diff options
Diffstat (limited to 'fs/xfs')
71 files changed, 2714 insertions, 2609 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index faca44997099..284a7c89697e 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,14 +16,11 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 +ccflags-y := -I$(src) -I$(src)/linux-2.6 +ccflags-$(CONFIG_XFS_DEBUG) += -g XFS_LINUX := linux-2.6 -ifeq ($(CONFIG_XFS_DEBUG),y) - EXTRA_CFLAGS += -g -endif - obj-$(CONFIG_XFS_FS) += xfs.o xfs-y += linux-2.6/xfs_trace.o @@ -105,11 +102,10 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_globals.o \ xfs_ioctl.o \ xfs_iops.o \ + xfs_message.o \ xfs_super.o \ xfs_sync.o \ xfs_xattr.o) # Objects in support/ -xfs-y += $(addprefix support/, \ - debug.o \ - uuid.o) +xfs-y += support/uuid.o diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 666c9db48eb6..a907de565db3 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -23,6 +23,7 @@ #include <linux/backing-dev.h> #include "time.h" #include "kmem.h" +#include "xfs_message.h" /* * Greedy allocation. May fail and may return vmalloced memory. @@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags) if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) - printk(KERN_ERR "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); @@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) return ptr; if (!(++retries % 100)) - printk(KERN_ERR "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, lflags); congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index ec7bbb5645b6..79ce38be15a1 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -413,8 +413,7 @@ xfs_submit_ioend_bio( if (xfs_ioend_new_eof(ioend)) xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC_PLUG : WRITE, bio); + submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); } STATIC struct bio * @@ -854,7 +853,7 @@ xfs_aops_discard_page( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) goto out_invalidate; - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "page discard on page %p, inode 0x%llx, offset %llu.", page, ip->i_ino, offset); @@ -872,7 +871,7 @@ xfs_aops_discard_page( if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "page discard unable to remove delalloc mapping."); } break; @@ -1296,7 +1295,7 @@ xfs_get_blocks_direct( * If the private argument is non-NULL __xfs_get_blocks signals us that we * need to issue a transaction to convert the range from unwritten to written * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successfull AIO + * to do this and we are done. But in case this was a successful AIO * request this handler is called from interrupt context, from which we * can't start transactions. In that case offload the I/O completion to * the workqueues we also use for buffered I/O completion. @@ -1411,7 +1410,7 @@ xfs_vm_write_failed( if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "xfs_vm_write_failed: unable to clean up ino %lld", ip->i_ino); } @@ -1495,7 +1494,6 @@ const struct address_space_operations xfs_address_space_operations = { .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, - .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ac1c7e8378dd..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -33,7 +33,6 @@ #include <linux/migrate.h> #include <linux/backing-dev.h> #include <linux/freezer.h> -#include <linux/list_sort.h> #include "xfs_sb.h" #include "xfs_inum.h" @@ -94,75 +93,6 @@ xfs_buf_vmap_len( } /* - * Page Region interfaces. - * - * For pages in filesystems where the blocksize is smaller than the - * pagesize, we use the page->private field (long) to hold a bitmap - * of uptodate regions within the page. - * - * Each such region is "bytes per page / bits per long" bytes long. - * - * NBPPR == number-of-bytes-per-page-region - * BTOPR == bytes-to-page-region (rounded up) - * BTOPRT == bytes-to-page-region-truncated (rounded down) - */ -#if (BITS_PER_LONG == 32) -#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */ -#elif (BITS_PER_LONG == 64) -#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */ -#else -#error BITS_PER_LONG must be 32 or 64 -#endif -#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG) -#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT) -#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT)) - -STATIC unsigned long -page_region_mask( - size_t offset, - size_t length) -{ - unsigned long mask; - int first, final; - - first = BTOPR(offset); - final = BTOPRT(offset + length - 1); - first = min(first, final); - - mask = ~0UL; - mask <<= BITS_PER_LONG - (final - first); - mask >>= BITS_PER_LONG - (final); - - ASSERT(offset + length <= PAGE_CACHE_SIZE); - ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0); - - return mask; -} - -STATIC void -set_page_region( - struct page *page, - size_t offset, - size_t length) -{ - set_page_private(page, - page_private(page) | page_region_mask(offset, length)); - if (page_private(page) == ~0UL) - SetPageUptodate(page); -} - -STATIC int -test_page_region( - struct page *page, - size_t offset, - size_t length) -{ - unsigned long mask = page_region_mask(offset, length); - - return (mask && (page_private(page) & mask) == mask); -} - -/* * xfs_buf_lru_add - add a buffer to the LRU. * * The LRU takes a new reference to the buffer so that it will only be freed @@ -189,7 +119,7 @@ xfs_buf_lru_add( * The unlocked check is safe here because it only occurs when there are not * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unneccessary round trip on the + * xfs_buf_free(). i.e. it removes an unnecessary round trip on the * bt_lru_lock. */ STATIC void @@ -332,7 +262,7 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_lru)); - if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { + if (bp->b_flags & _XBF_PAGES) { uint i; if (xfs_buf_is_vmapped(bp)) @@ -342,56 +272,77 @@ xfs_buf_free( for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; - if (bp->b_flags & _XBF_PAGE_CACHE) - ASSERT(!PagePrivate(page)); - page_cache_release(page); + __free_page(page); } - } + } else if (bp->b_flags & _XBF_KMEM) + kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); xfs_buf_deallocate(bp); } /* - * Finds all pages for buffer in question and builds it's page list. + * Allocates all the pages for buffer in question and builds it's page list. */ STATIC int -_xfs_buf_lookup_pages( +xfs_buf_allocate_memory( xfs_buf_t *bp, uint flags) { - struct address_space *mapping = bp->b_target->bt_mapping; - size_t blocksize = bp->b_target->bt_bsize; size_t size = bp->b_count_desired; size_t nbytes, offset; gfp_t gfp_mask = xb_to_gfp(flags); unsigned short page_count, i; - pgoff_t first; xfs_off_t end; int error; + /* + * for buffers that are contained within a single page, just allocate + * the memory from the heap - there's no need for the complexity of + * page arrays to keep allocation down to order 0. + */ + if (bp->b_buffer_length < PAGE_SIZE) { + bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); + if (!bp->b_addr) { + /* low memory - use alloc_page loop instead */ + goto use_alloc_page; + } + + if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & + PAGE_MASK) != + ((unsigned long)bp->b_addr & PAGE_MASK)) { + /* b_addr spans two pages - use alloc_page instead */ + kmem_free(bp->b_addr); + bp->b_addr = NULL; + goto use_alloc_page; + } + bp->b_offset = offset_in_page(bp->b_addr); + bp->b_pages = bp->b_page_array; + bp->b_pages[0] = virt_to_page(bp->b_addr); + bp->b_page_count = 1; + bp->b_flags |= XBF_MAPPED | _XBF_KMEM; + return 0; + } + +use_alloc_page: end = bp->b_file_offset + bp->b_buffer_length; page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); - error = _xfs_buf_get_pages(bp, page_count, flags); if (unlikely(error)) return error; - bp->b_flags |= _XBF_PAGE_CACHE; offset = bp->b_offset; - first = bp->b_file_offset >> PAGE_CACHE_SHIFT; + bp->b_flags |= _XBF_PAGES; for (i = 0; i < bp->b_page_count; i++) { struct page *page; uint retries = 0; - - retry: - page = find_or_create_page(mapping, first + i, gfp_mask); +retry: + page = alloc_page(gfp_mask); if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); - return -ENOMEM; + error = ENOMEM; + goto out_free_pages; } /* @@ -401,9 +352,8 @@ _xfs_buf_lookup_pages( * handle buffer allocation failures we can't do much. */ if (!(++retries % 100)) - printk(KERN_ERR - "XFS: possible memory allocation " - "deadlock in %s (mode:0x%x)\n", + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", __func__, gfp_mask); XFS_STATS_INC(xb_page_retries); @@ -413,52 +363,44 @@ _xfs_buf_lookup_pages( XFS_STATS_INC(xb_page_found); - nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset); + nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; - - ASSERT(!PagePrivate(page)); - if (!PageUptodate(page)) { - page_count--; - if (blocksize >= PAGE_CACHE_SIZE) { - if (flags & XBF_READ) - bp->b_flags |= _XBF_PAGE_LOCKED; - } else if (!PagePrivate(page)) { - if (test_page_region(page, offset, nbytes)) - page_count++; - } - } - bp->b_pages[i] = page; offset = 0; } + return 0; - if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); - } - - if (page_count == bp->b_page_count) - bp->b_flags |= XBF_DONE; - +out_free_pages: + for (i = 0; i < bp->b_page_count; i++) + __free_page(bp->b_pages[i]); return error; } /* - * Map buffer into kernel address-space if nessecary. + * Map buffer into kernel address-space if necessary. */ STATIC int _xfs_buf_map_pages( xfs_buf_t *bp, uint flags) { - /* A single page buffer is always mappable */ + ASSERT(bp->b_flags & _XBF_PAGES); if (bp->b_page_count == 1) { + /* A single page buffer is always mappable */ bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); - if (unlikely(bp->b_addr == NULL)) + int retried = 0; + + do { + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); + if (bp->b_addr) + break; + vm_unmap_aliases(); + } while (retried++ <= 1); + + if (!bp->b_addr) return -ENOMEM; bp->b_addr += bp->b_offset; bp->b_flags |= XBF_MAPPED; @@ -569,9 +511,14 @@ found: } } + /* + * if the buffer is stale, clear all the external state associated with + * it. We need to keep flags such as how we allocated the buffer memory + * intact here. + */ if (bp->b_flags & XBF_STALE) { ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= XBF_MAPPED; + bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; } trace_xfs_buf_find(bp, flags, _RET_IP_); @@ -592,7 +539,7 @@ xfs_buf_get( xfs_buf_flags_t flags) { xfs_buf_t *bp, *new_bp; - int error = 0, i; + int error = 0; new_bp = xfs_buf_allocate(flags); if (unlikely(!new_bp)) @@ -600,7 +547,7 @@ xfs_buf_get( bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); if (bp == new_bp) { - error = _xfs_buf_lookup_pages(bp, flags); + error = xfs_buf_allocate_memory(bp, flags); if (error) goto no_buffer; } else { @@ -609,14 +556,11 @@ xfs_buf_get( return NULL; } - for (i = 0; i < bp->b_page_count; i++) - mark_page_accessed(bp->b_pages[i]); - if (!(bp->b_flags & XBF_MAPPED)) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - printk(KERN_WARNING "%s: failed to map pages\n", - __func__); + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); goto no_buffer; } } @@ -710,10 +654,7 @@ xfs_buf_readahead( xfs_off_t ioff, size_t isize) { - struct backing_dev_info *bdi; - - bdi = target->bt_mapping->backing_dev_info; - if (bdi_read_congested(bdi)) + if (bdi_read_congested(target->bt_bdi)) return; xfs_buf_read(target, ioff, isize, @@ -767,6 +708,27 @@ xfs_buf_get_empty( return bp; } +/* + * Return a buffer allocated as an empty buffer and associated to external + * memory via xfs_buf_associate_memory() back to it's empty state. + */ +void +xfs_buf_set_empty( + struct xfs_buf *bp, + size_t len) +{ + if (bp->b_pages) + _xfs_buf_free_pages(bp); + + bp->b_pages = NULL; + bp->b_page_count = 0; + bp->b_addr = NULL; + bp->b_file_offset = 0; + bp->b_buffer_length = bp->b_count_desired = len; + bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_flags &= ~XBF_MAPPED; +} + static inline struct page * mem_to_page( void *addr) @@ -791,10 +753,10 @@ xfs_buf_associate_memory( size_t buflen; int page_count; - pageaddr = (unsigned long)mem & PAGE_CACHE_MASK; + pageaddr = (unsigned long)mem & PAGE_MASK; offset = (unsigned long)mem - pageaddr; - buflen = PAGE_CACHE_ALIGN(len + offset); - page_count = buflen >> PAGE_CACHE_SHIFT; + buflen = PAGE_ALIGN(len + offset); + page_count = buflen >> PAGE_SHIFT; /* Free any previous set of page pointers */ if (bp->b_pages) @@ -811,13 +773,12 @@ xfs_buf_associate_memory( for (i = 0; i < bp->b_page_count; i++) { bp->b_pages[i] = mem_to_page((void *)pageaddr); - pageaddr += PAGE_CACHE_SIZE; + pageaddr += PAGE_SIZE; } bp->b_count_desired = len; bp->b_buffer_length = buflen; bp->b_flags |= XBF_MAPPED; - bp->b_flags &= ~_XBF_PAGE_LOCKED; return 0; } @@ -850,8 +811,8 @@ xfs_buf_get_uncached( error = _xfs_buf_map_pages(bp, XBF_MAPPED); if (unlikely(error)) { - printk(KERN_WARNING "%s: failed to map pages\n", - __func__); + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); goto fail_free_mem; } @@ -924,20 +885,7 @@ xfs_buf_rele( /* - * Mutual exclusion on buffers. Locking model: - * - * Buffers associated with inodes for which buffer locking - * is not enabled are not protected by semaphores, and are - * assumed to be exclusively owned by the caller. There is a - * spinlock in the buffer, used by the caller when concurrent - * access is possible. - */ - -/* - * Locks a buffer object, if it is not already locked. Note that this in - * no way locks the underlying pages, so it is only useful for - * synchronizing concurrent use of buffer objects, not for synchronizing - * independent access to the underlying pages. + * Lock a buffer object, if it is not already locked. * * If we come across a stale, pinned, locked buffer, we know that we are * being asked to lock a buffer that has been reallocated. Because it is @@ -971,10 +919,7 @@ xfs_buf_lock_value( } /* - * Locks a buffer object. - * Note that this in no way locks the underlying pages, so it is only - * useful for synchronizing concurrent use of buffer objects, not for - * synchronizing independent access to the underlying pages. + * Lock a buffer object. * * If we come across a stale, pinned, locked buffer, we know that we * are being asked to lock a buffer that has been reallocated. Because @@ -990,8 +935,6 @@ xfs_buf_lock( if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) xfs_log_force(bp->b_target->bt_mount, 0); - if (atomic_read(&bp->b_io_remaining)) - blk_run_address_space(bp->b_target->bt_mapping); down(&bp->b_sema); XB_SET_OWNER(bp); @@ -1035,9 +978,7 @@ xfs_buf_wait_unpin( set_current_state(TASK_UNINTERRUPTIBLE); if (atomic_read(&bp->b_pin_count) == 0) break; - if (atomic_read(&bp->b_io_remaining)) - blk_run_address_space(bp->b_target->bt_mapping); - schedule(); + io_schedule(); } remove_wait_queue(&bp->b_waiters, &wait); set_current_state(TASK_RUNNING); @@ -1249,10 +1190,8 @@ _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { - bp->b_flags &= ~_XBF_PAGE_LOCKED; + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) xfs_buf_ioend(bp, schedule); - } } STATIC void @@ -1261,35 +1200,12 @@ xfs_buf_bio_end_io( int error) { xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; - unsigned int blocksize = bp->b_target->bt_bsize; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; xfs_buf_ioerror(bp, -error); if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); - do { - struct page *page = bvec->bv_page; - - ASSERT(!PagePrivate(page)); - if (unlikely(bp->b_error)) { - if (bp->b_flags & XBF_READ) - ClearPageUptodate(page); - } else if (blocksize >= PAGE_CACHE_SIZE) { - SetPageUptodate(page); - } else if (!PagePrivate(page) && - (bp->b_flags & _XBF_PAGE_CACHE)) { - set_page_region(page, bvec->bv_offset, bvec->bv_len); - } - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (bp->b_flags & _XBF_PAGE_LOCKED) - unlock_page(page); - } while (bvec >= bio->bi_io_vec); - _xfs_buf_ioend(bp, 1); bio_put(bio); } @@ -1303,7 +1219,6 @@ _xfs_buf_ioapply( int offset = bp->b_offset; int size = bp->b_count_desired; sector_t sector = bp->b_bn; - unsigned int blocksize = bp->b_target->bt_bsize; total_nr_pages = bp->b_page_count; map_i = 0; @@ -1324,29 +1239,6 @@ _xfs_buf_ioapply( (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; } - /* Special code path for reading a sub page size buffer in -- - * we populate up the whole page, and hence the other metadata - * in the same page. This optimization is only valid when the - * filesystem block size is not smaller than the page size. - */ - if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == - (XBF_READ|_XBF_PAGE_LOCKED)) && - (blocksize >= PAGE_CACHE_SIZE)) { - bio = bio_alloc(GFP_NOIO, 1); - - bio->bi_bdev = bp->b_target->bt_bdev; - bio->bi_sector = sector - (offset >> BBSHIFT); - bio->bi_end_io = xfs_buf_bio_end_io; - bio->bi_private = bp; - - bio_add_page(bio, bp->b_pages[0], PAGE_CACHE_SIZE, 0); - size = 0; - - atomic_inc(&bp->b_io_remaining); - - goto submit_io; - } next_chunk: atomic_inc(&bp->b_io_remaining); @@ -1360,8 +1252,9 @@ next_chunk: bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; + for (; size && nr_pages; nr_pages--, map_i++) { - int rbytes, nbytes = PAGE_CACHE_SIZE - offset; + int rbytes, nbytes = PAGE_SIZE - offset; if (nbytes > size) nbytes = size; @@ -1376,7 +1269,6 @@ next_chunk: total_nr_pages--; } -submit_io: if (likely(bio->bi_size)) { if (xfs_buf_is_vmapped(bp)) { flush_kernel_vmap_range(bp->b_addr, @@ -1386,18 +1278,7 @@ submit_io: if (size) goto next_chunk; } else { - /* - * if we get here, no pages were added to the bio. However, - * we can't just error out here - if the pages are locked then - * we have to unlock them otherwise we can hang on a later - * access to the page. - */ xfs_buf_ioerror(bp, EIO); - if (bp->b_flags & _XBF_PAGE_LOCKED) { - int i; - for (i = 0; i < bp->b_page_count; i++) - unlock_page(bp->b_pages[i]); - } bio_put(bio); } } @@ -1442,8 +1323,6 @@ xfs_buf_iowait( { trace_xfs_buf_iowait(bp, _RET_IP_); - if (atomic_read(&bp->b_io_remaining)) - blk_run_address_space(bp->b_target->bt_mapping); wait_for_completion(&bp->b_iowait); trace_xfs_buf_iowait_done(bp, _RET_IP_); @@ -1461,8 +1340,8 @@ xfs_buf_offset( return XFS_BUF_PTR(bp) + offset; offset += bp->b_offset; - page = bp->b_pages[offset >> PAGE_CACHE_SHIFT]; - return (xfs_caddr_t)page_address(page) + (offset & (PAGE_CACHE_SIZE-1)); + page = bp->b_pages[offset >> PAGE_SHIFT]; + return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); } /* @@ -1484,9 +1363,9 @@ xfs_buf_iomove( page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; cpoff = xfs_buf_poff(boff + bp->b_offset); csize = min_t(size_t, - PAGE_CACHE_SIZE-cpoff, bp->b_count_desired-boff); + PAGE_SIZE-cpoff, bp->b_count_desired-boff); - ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE)); + ASSERT(((csize + cpoff) <= PAGE_SIZE)); switch (mode) { case XBRW_ZERO: @@ -1543,12 +1422,12 @@ restart: int xfs_buftarg_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t mask) + struct shrink_control *sc) { struct xfs_buftarg *btp = container_of(shrink, struct xfs_buftarg, bt_shrinker); struct xfs_buf *bp; + int nr_to_scan = sc->nr_to_scan; LIST_HEAD(dispose); if (!nr_to_scan) @@ -1599,7 +1478,6 @@ xfs_free_buftarg( xfs_flush_buftarg(btp, 1); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); - iput(btp->bt_mapping->host); kthread_stop(btp->bt_task); kmem_free(btp); @@ -1617,21 +1495,12 @@ xfs_setsize_buftarg_flags( btp->bt_smask = sectorsize - 1; if (set_blocksize(btp->bt_bdev, sectorsize)) { - printk(KERN_WARNING - "XFS: Cannot set_blocksize to %u on device %s\n", + xfs_warn(btp->bt_mount, + "Cannot set_blocksize to %u on device %s\n", sectorsize, XFS_BUFTARG_NAME(btp)); return EINVAL; } - if (verbose && - (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) { - printk(KERN_WARNING - "XFS: %u byte sectors in use on device %s. " - "This is suboptimal; %u or greater is ideal.\n", - sectorsize, XFS_BUFTARG_NAME(btp), - (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG); - } - return 0; } @@ -1646,7 +1515,7 @@ xfs_setsize_buftarg_early( struct block_device *bdev) { return xfs_setsize_buftarg_flags(btp, - PAGE_CACHE_SIZE, bdev_logical_block_size(bdev), 0); + PAGE_SIZE, bdev_logical_block_size(bdev), 0); } int @@ -1659,41 +1528,6 @@ xfs_setsize_buftarg( } STATIC int -xfs_mapping_buftarg( - xfs_buftarg_t *btp, - struct block_device *bdev) -{ - struct backing_dev_info *bdi; - struct inode *inode; - struct address_space *mapping; - static const struct address_space_operations mapping_aops = { - .sync_page = block_sync_page, - .migratepage = fail_migrate_page, - }; - - inode = new_inode(bdev->bd_inode->i_sb); - if (!inode) { - printk(KERN_WARNING - "XFS: Cannot allocate mapping inode for device %s\n", - XFS_BUFTARG_NAME(btp)); - return ENOMEM; - } - inode->i_ino = get_next_ino(); - inode->i_mode = S_IFBLK; - inode->i_bdev = bdev; - inode->i_rdev = bdev->bd_dev; - bdi = blk_get_backing_dev_info(bdev); - if (!bdi) - bdi = &default_backing_dev_info; - mapping = &inode->i_data; - mapping->a_ops = &mapping_aops; - mapping->backing_dev_info = bdi; - mapping_set_gfp_mask(mapping, GFP_NOFS); - btp->bt_mapping = mapping; - return 0; -} - -STATIC int xfs_alloc_delwrite_queue( xfs_buftarg_t *btp, const char *fsname) @@ -1721,12 +1555,14 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; + btp->bt_bdi = blk_get_backing_dev_info(bdev); + if (!btp->bt_bdi) + goto error; + INIT_LIST_HEAD(&btp->bt_lru); spin_lock_init(&btp->bt_lru_lock); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; - if (xfs_mapping_buftarg(btp, bdev)) - goto error; if (xfs_alloc_delwrite_queue(btp, fsname)) goto error; btp->bt_shrinker.shrink = xfs_buftarg_shrink; @@ -1923,8 +1759,8 @@ xfsbufd( do { long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); - int count = 0; struct list_head tmp; + struct blk_plug plug; if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); @@ -1940,16 +1776,15 @@ xfsbufd( xfs_buf_delwri_split(target, &tmp, age); list_sort(NULL, &tmp, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp)) { struct xfs_buf *bp; bp = list_first_entry(&tmp, struct xfs_buf, b_list); list_del_init(&bp->b_list); xfs_bdstrat_cb(bp); - count++; } - if (count) - blk_run_address_space(target->bt_mapping); - + blk_finish_plug(&plug); } while (!kthread_should_stop()); return 0; @@ -1969,6 +1804,7 @@ xfs_flush_buftarg( int pincount = 0; LIST_HEAD(tmp_list); LIST_HEAD(wait_list); + struct blk_plug plug; xfs_buf_runall_queues(xfsconvertd_workqueue); xfs_buf_runall_queues(xfsdatad_workqueue); @@ -1983,6 +1819,8 @@ xfs_flush_buftarg( * we do that after issuing all the IO. */ list_sort(NULL, &tmp_list, xfs_buf_cmp); + + blk_start_plug(&plug); while (!list_empty(&tmp_list)) { bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); ASSERT(target == bp->b_target); @@ -1993,10 +1831,10 @@ xfs_flush_buftarg( } xfs_bdstrat_cb(bp); } + blk_finish_plug(&plug); if (wait) { - /* Expedite and wait for IO to complete. */ - blk_run_address_space(target->bt_mapping); + /* Wait for IO to complete. */ while (!list_empty(&wait_list)) { bp = list_first_entry(&wait_list, struct xfs_buf, b_list); @@ -2022,11 +1860,12 @@ xfs_buf_init(void) if (!xfslogd_workqueue) goto out_free_buf_zone; - xfsdatad_workqueue = create_workqueue("xfsdatad"); + xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfsconvertd_workqueue = create_workqueue("xfsconvertd"); + xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", + WQ_MEM_RECLAIM, 1); if (!xfsconvertd_workqueue) goto out_destroy_xfsdatad_workqueue; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index cbe65950e524..50a7d5fb3b73 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -61,30 +61,11 @@ typedef enum { #define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ /* flags used only internally */ -#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */ #define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ #define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ +#define _XBF_KMEM (1 << 20)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */ -/* - * Special flag for supporting metadata blocks smaller than a FSB. - * - * In this case we can have multiple xfs_buf_t on a single page and - * need to lock out concurrent xfs_buf_t readers as they only - * serialise access to the buffer. - * - * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation - * between reads of the page. Hence we can have one thread read the - * page and modify it, but then race with another thread that thinks - * the page is not up-to-date and hence reads it again. - * - * The result is that the first modifcation to the page is lost. - * This sort of AGF/AGI reading race can happen when unlinking inodes - * that require truncation and results in the AGI unlinked list - * modifications being lost. - */ -#define _XBF_PAGE_LOCKED (1 << 22) - typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ @@ -100,12 +81,10 @@ typedef unsigned int xfs_buf_flags_t; { XBF_LOCK, "LOCK" }, /* should never be set */\ { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ - { _XBF_PAGE_CACHE, "PAGE_CACHE" }, \ { _XBF_PAGES, "PAGES" }, \ { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } - + { _XBF_KMEM, "KMEM" }, \ + { _XBF_DELWRI_Q, "DELWRI_Q" } typedef enum { XBT_FORCE_SLEEP = 0, @@ -120,7 +99,7 @@ typedef struct xfs_bufhash { typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; - struct address_space *bt_mapping; + struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; unsigned int bt_bsize; unsigned int bt_sshift; @@ -139,17 +118,6 @@ typedef struct xfs_buftarg { unsigned int bt_lru_nr; } xfs_buftarg_t; -/* - * xfs_buf_t: Buffer structure for pagecache-based buffers - * - * This buffer structure is used by the pagecache buffer management routines - * to refer to an assembly of pages forming a logical buffer. - * - * The buffer structure is used on a temporary basis only, and discarded when - * released. The real data storage is recorded in the pagecache. Buffers are - * hashed to the block device on which the file system resides. - */ - struct xfs_buf; typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); @@ -210,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, xfs_buf_flags_t); extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); +extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); extern void xfs_buf_hold(xfs_buf_t *); diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c index 05201ae719e5..244e797dae32 100644 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ b/fs/xfs/linux-2.6/xfs_discard.c @@ -152,6 +152,8 @@ xfs_ioc_trim( if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); + if (!blk_queue_discard(q)) + return -XFS_ERROR(EOPNOTSUPP); if (copy_from_user(&range, urange, sizeof(range))) return -XFS_ERROR(EFAULT); @@ -189,3 +191,32 @@ xfs_ioc_trim( return -XFS_ERROR(EFAULT); return 0; } + +int +xfs_discard_extents( + struct xfs_mount *mp, + struct list_head *list) +{ + struct xfs_busy_extent *busyp; + int error = 0; + + list_for_each_entry(busyp, list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, 0); + if (error && error != EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llu,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + return error; + } + } + + return 0; +} diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h index e82b6dd3e127..344879aea646 100644 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ b/fs/xfs/linux-2.6/xfs_discard.h @@ -2,7 +2,9 @@ #define XFS_DISCARD_H 1 struct fstrim_range; +struct list_head; extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); #endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index fc0114da7fdd..f4f878fc0083 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -89,8 +89,10 @@ xfs_fs_encode_fh( * seven combinations work. The real answer is "don't use v2". */ len = xfs_fileid_length(fileid_type); - if (*max_len < len) + if (*max_len < len) { + *max_len = len; return 255; + } *max_len = len; switch (fileid_type) { diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index a55c1b46b219..f4213ba1ff85 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -381,7 +381,7 @@ xfs_aio_write_isize_update( /* * If this was a direct or synchronous I/O that failed (such as ENOSPC) then - * part of the I/O may have been written to disk before the error occured. In + * part of the I/O may have been written to disk before the error occurred. In * this case the on-disk file size may have been adjusted beyond the in-memory * file size and now needs to be truncated back. */ @@ -896,6 +896,7 @@ xfs_file_fallocate( xfs_flock64_t bf; xfs_inode_t *ip = XFS_I(inode); int cmd = XFS_IOC_RESVSP; + int attr_flags = XFS_ATTR_NOLOCK; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -918,7 +919,10 @@ xfs_file_fallocate( goto out_unlock; } - error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); + if (file->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + + error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); if (error) goto out_unlock; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index f5e2a19e0f8e..acca2c5ca3fa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -624,6 +624,10 @@ xfs_ioc_space( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= XFS_ATTR_NONBLOCK; + + if (filp->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; @@ -695,14 +699,19 @@ xfs_ioc_fsgeometry_v1( xfs_mount_t *mp, void __user *arg) { - xfs_fsop_geom_v1_t fsgeo; + xfs_fsop_geom_t fsgeo; int error; - error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); + error = xfs_fs_geometry(mp, &fsgeo, 3); if (error) return -error; - if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) + /* + * Caller should have passed an argument of type + * xfs_fsop_geom_v1_t. This is a proper subset of the + * xfs_fsop_geom_t that xfs_fs_geometry() fills in. + */ + if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) return -XFS_ERROR(EFAULT); return 0; } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b3486dfa5520..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -586,7 +586,8 @@ xfs_file_compat_ioctl( case XFS_IOC_RESVSP_32: case XFS_IOC_UNRESVSP_32: case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: { + case XFS_IOC_UNRESVSP64_32: + case XFS_IOC_ZERO_RANGE_32: { struct xfs_flock64 bf; if (xfs_compat_flock64_copyin(&bf, arg)) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 08b605792a99..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 { #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) +#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) typedef struct compat_xfs_fsop_geom_v1 { __u32 blocksize; /* filesystem (data) block size */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index bd5727852fd6..dd21784525a8 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -70,7 +70,7 @@ xfs_synchronize_times( /* * If the linux inode is valid, mark it dirty. - * Used when commiting a dirty inode into a transaction so that + * Used when committing a dirty inode into a transaction so that * the inode will get written back by the linux code */ void @@ -102,7 +102,8 @@ xfs_mark_inode_dirty( STATIC int xfs_init_security( struct inode *inode, - struct inode *dir) + struct inode *dir, + const struct qstr *qstr) { struct xfs_inode *ip = XFS_I(inode); size_t length; @@ -110,7 +111,7 @@ xfs_init_security( unsigned char *name; int error; - error = security_inode_init_security(inode, dir, (char **)&name, + error = security_inode_init_security(inode, dir, qstr, (char **)&name, &value, &length); if (error) { if (error == -EOPNOTSUPP) @@ -194,7 +195,7 @@ xfs_vn_mknod( inode = VFS_I(ip); - error = xfs_init_security(inode, dir); + error = xfs_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; @@ -367,7 +368,7 @@ xfs_vn_symlink( inode = VFS_I(cip); - error = xfs_init_security(inode, dir); + error = xfs_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 096494997747..8633521b3b2e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -39,7 +39,6 @@ #include <mrlock.h> #include <time.h> -#include <support/debug.h> #include <support/uuid.h> #include <linux/semaphore.h> @@ -71,6 +70,7 @@ #include <linux/ctype.h> #include <linux/writeback.h> #include <linux/capability.h> +#include <linux/list_sort.h> #include <asm/page.h> #include <asm/div64.h> @@ -86,6 +86,7 @@ #include <xfs_aops.h> #include <xfs_super.h> #include <xfs_buf.h> +#include <xfs_message.h> /* * Feature macros (disable/enable) @@ -280,4 +281,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) #define __arch_pack #endif +#define ASSERT_ALWAYS(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef DEBUG +#define ASSERT(expr) ((void)0) + +#ifndef STATIC +# define STATIC static noinline +#endif + +#else /* DEBUG */ + +#define ASSERT(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef STATIC +# define STATIC noinline +#endif + +#endif /* DEBUG */ + #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c new file mode 100644 index 000000000000..bd672def95ac --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" + +/* + * XFS logging functions + */ +static void +__xfs_printk( + const char *level, + const struct xfs_mount *mp, + struct va_format *vaf) +{ + if (mp && mp->m_fsname) { + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + return; + } + printk("%sXFS: %pV\n", level, vaf); +} + +#define define_xfs_printk_level(func, kern_level) \ +void func(const struct xfs_mount *mp, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __xfs_printk(kern_level, mp, &vaf); \ + va_end(args); \ +} \ + +define_xfs_printk_level(xfs_emerg, KERN_EMERG); +define_xfs_printk_level(xfs_alert, KERN_ALERT); +define_xfs_printk_level(xfs_crit, KERN_CRIT); +define_xfs_printk_level(xfs_err, KERN_ERR); +define_xfs_printk_level(xfs_warn, KERN_WARNING); +define_xfs_printk_level(xfs_notice, KERN_NOTICE); +define_xfs_printk_level(xfs_info, KERN_INFO); +#ifdef DEBUG +define_xfs_printk_level(xfs_debug, KERN_DEBUG); +#endif + +void +xfs_alert_tag( + const struct xfs_mount *mp, + int panic_tag, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int do_panic = 0; + + if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { + xfs_alert(mp, "Transforming an alert into a BUG."); + do_panic = 1; + } + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + __xfs_printk(KERN_ALERT, mp, &vaf); + va_end(args); + + BUG_ON(do_panic); +} + +void +assfail(char *expr, char *file, int line) +{ + xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +void +xfs_hex_dump(void *p, int length) +{ + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); +} diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h new file mode 100644 index 000000000000..7fb7ea007672 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_message.h @@ -0,0 +1,39 @@ +#ifndef __XFS_MESSAGE_H +#define __XFS_MESSAGE_H 1 + +struct xfs_mount; + +extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, + const char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +#ifdef DEBUG +extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +#else +static inline void +__attribute__ ((format (printf, 2, 3))) +xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +{ +} +#endif + +extern void assfail(char *expr, char *f, int l); + +extern void xfs_hex_dump(void *p, int length); + +#endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9731898083ae..98b9c91fcdf1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -110,8 +110,10 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ +#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ +#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ +#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ /* * Table driven mount option parser. @@ -173,6 +175,15 @@ xfs_parseargs( __uint8_t iosizelog = 0; /* + * set up the mount name first so all the errors will refer to the + * correct device. + */ + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + /* * Copy binary VFS mount flags we are interested in. */ if (sb->s_flags & MS_RDONLY) @@ -189,6 +200,7 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_flags |= XFS_MOUNT_DELAYLOG; /* * These can be overridden by the mount option parsing. @@ -207,24 +219,21 @@ xfs_parseargs( if (!strcmp(this_char, MNTOPT_LOGBUFS)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } mp->m_logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } mp->m_logbsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -232,14 +241,12 @@ xfs_parseargs( if (!mp->m_logname) return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", + xfs_warn(mp, "%s option not allowed on this system", this_char); return EINVAL; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -248,8 +255,7 @@ xfs_parseargs( return ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -257,8 +263,7 @@ xfs_parseargs( iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -280,16 +285,14 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } dsunit = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { if (!value || !*value) { - cmn_err(CE_WARN, - "XFS: %s option requires an argument", + xfs_warn(mp, "%s option requires an argument", this_char); return EINVAL; } @@ -297,8 +300,7 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS - cmn_err(CE_WARN, - "XFS: %s option not allowed on this system", + xfs_warn(mp, "%s option not allowed on this system", this_char); return EINVAL; #endif @@ -355,21 +357,24 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DELAYLOG; } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { mp->m_flags &= ~XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_DISCARD)) { + mp->m_flags |= XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { + mp->m_flags &= ~XFS_MOUNT_DISCARD; } else if (!strcmp(this_char, "ihashsize")) { - cmn_err(CE_WARN, - "XFS: ihashsize no longer used, option is deprecated."); + xfs_warn(mp, + "ihashsize no longer used, option is deprecated."); } else if (!strcmp(this_char, "osyncisdsync")) { - cmn_err(CE_WARN, - "XFS: osyncisdsync has no effect, option is deprecated."); + xfs_warn(mp, + "osyncisdsync has no effect, option is deprecated."); } else if (!strcmp(this_char, "osyncisosync")) { - cmn_err(CE_WARN, - "XFS: osyncisosync has no effect, option is deprecated."); + xfs_warn(mp, + "osyncisosync has no effect, option is deprecated."); } else if (!strcmp(this_char, "irixsgid")) { - cmn_err(CE_WARN, - "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); + xfs_warn(mp, + "irixsgid is now a sysctl(2) variable, option is deprecated."); } else { - cmn_err(CE_WARN, - "XFS: unknown mount option [%s].", this_char); + xfs_warn(mp, "unknown mount option [%s].", this_char); return EINVAL; } } @@ -379,40 +384,44 @@ xfs_parseargs( */ if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && !(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); + xfs_warn(mp, "no-recovery mounts must be read-only."); return EINVAL; } if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth options incompatible with the noalign option"); + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); + return EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_DISCARD) && + !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { + xfs_warn(mp, + "the discard option is incompatible with the nodelaylog option"); return EINVAL; } #ifndef CONFIG_XFS_QUOTA if (XFS_IS_QUOTA_RUNNING(mp)) { - cmn_err(CE_WARN, - "XFS: quota support not available in this kernel."); + xfs_warn(mp, "quota support not available in this kernel."); return EINVAL; } #endif if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { - cmn_err(CE_WARN, - "XFS: cannot mount with both project and group quota"); + xfs_warn(mp, "cannot mount with both project and group quota"); return EINVAL; } if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - cmn_err(CE_WARN, - "XFS: sunit and swidth must be specified together"); + xfs_warn(mp, "sunit and swidth must be specified together"); return EINVAL; } if (dsunit && (dswidth % dsunit != 0)) { - cmn_err(CE_WARN, - "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", dswidth, dsunit); return EINVAL; } @@ -438,8 +447,7 @@ done: mp->m_logbufs != 0 && (mp->m_logbufs < XLOG_MIN_ICLOGS || mp->m_logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); return XFS_ERROR(EINVAL); } @@ -448,22 +456,16 @@ done: (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || !is_power_of_2(mp->m_logbsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); return XFS_ERROR(EINVAL); } - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - if (iosizelog) { if (iosizelog > XFS_MAX_IO_LOG || iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", iosizelog, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); return XFS_ERROR(EINVAL); @@ -499,6 +501,7 @@ xfs_showargs( { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, + { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, { 0, NULL } }; static struct proc_xfs_info xfs_info_unset[] = { @@ -610,7 +613,7 @@ xfs_blkdev_get( mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); - printk("XFS: Invalid device [%s], error=%d\n", name, error); + xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); } return -error; @@ -664,23 +667,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) int error; if (mp->m_logdev_targp != mp->m_ddev_targp) { - xfs_fs_cmn_err(CE_NOTE, mp, + xfs_notice(mp, "Disabling barriers, not supported with external log device"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } if (xfs_readonly_buftarg(mp->m_ddev_targp)) { - xfs_fs_cmn_err(CE_NOTE, mp, - "Disabling barriers, underlying device is readonly"); + xfs_notice(mp, + "Disabling barriers, underlying device is readonly"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } error = xfs_barrier_test(mp); if (error) { - xfs_fs_cmn_err(CE_NOTE, mp, - "Disabling barriers, trial barrier write failed"); + xfs_notice(mp, + "Disabling barriers, trial barrier write failed"); mp->m_flags &= ~XFS_MOUNT_BARRIER; return; } @@ -743,8 +746,8 @@ xfs_open_devices( goto out_close_logdev; if (rtdev == ddev || rtdev == logdev) { - cmn_err(CE_WARN, - "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); + xfs_warn(mp, + "Cannot mount filesystem with identical rtdev and ddev/logdev."); error = EINVAL; goto out_close_rtdev; } @@ -827,75 +830,6 @@ xfs_setup_devices( return 0; } -/* - * XFS AIL push thread support - */ -void -xfsaild_wakeup( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) -{ - /* only ever move the target forwards */ - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) { - ailp->xa_target = threshold_lsn; - wake_up_process(ailp->xa_task); - } -} - -STATIC int -xfsaild( - void *data) -{ - struct xfs_ail *ailp = data; - xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; /* milliseconds */ - - while (!kthread_should_stop()) { - /* - * for short sleeps indicating congestion, don't allow us to - * get woken early. Otherwise all we do is bang on the AIL lock - * without making progress. - */ - if (tout && tout <= 20) - __set_current_state(TASK_KILLABLE); - else - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(tout ? - msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); - - /* swsusp */ - try_to_freeze(); - - ASSERT(ailp->xa_mount->m_log); - if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) - continue; - - tout = xfsaild_push(ailp, &last_pushed_lsn); - } - - return 0; -} /* xfsaild */ - -int -xfsaild_start( - struct xfs_ail *ailp) -{ - ailp->xa_target = 0; - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->xa_mount->m_fsname); - if (IS_ERR(ailp->xa_task)) - return -PTR_ERR(ailp->xa_task); - return 0; -} - -void -xfsaild_stop( - struct xfs_ail *ailp) -{ - kthread_stop(ailp->xa_task); -} - - /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( @@ -1089,7 +1023,7 @@ xfs_fs_write_inode( error = 0; goto out_unlock; } - error = xfs_iflush(ip, 0); + error = xfs_iflush(ip, SYNC_TRYLOCK); } out_unlock: @@ -1202,22 +1136,12 @@ xfs_fs_sync_fs( return -error; if (laptop_mode) { - int prev_sync_seq = mp->m_sync_seq; - /* * The disk must be active because we're syncing. * We schedule xfssyncd now (now that the disk is * active) instead of later (when it might not be). */ - wake_up_process(mp->m_sync_task); - /* - * We have to wait for the sync iteration to complete. - * If we don't, the disk activity caused by the sync - * will come after the sync is completed, and that - * triggers another sync from laptop mode. - */ - wait_event(mp->m_wait_single_sync_task, - mp->m_sync_seq != prev_sync_seq); + flush_delayed_work_sync(&mp->m_sync_work); } return 0; @@ -1345,8 +1269,8 @@ xfs_fs_remount( * options that we can't actually change. */ #if 0 - printk(KERN_INFO - "XFS: mount option \"%s\" not supported for remount\n", p); + xfs_info(mp, + "mount option \"%s\" not supported for remount\n", p); return -EINVAL; #else break; @@ -1367,8 +1291,7 @@ xfs_fs_remount( if (mp->m_update_flags) { error = xfs_mount_log_sb(mp, mp->m_update_flags); if (error) { - cmn_err(CE_WARN, - "XFS: failed to write sb changes"); + xfs_warn(mp, "failed to write sb changes"); return error; } mp->m_update_flags = 0; @@ -1452,15 +1375,15 @@ xfs_finish_flags( mp->m_logbsize = mp->m_sb.sb_logsunit; } else if (mp->m_logbsize > 0 && mp->m_logbsize < mp->m_sb.sb_logsunit) { - cmn_err(CE_WARN, - "XFS: logbuf size must be greater than or equal to log stripe size"); + xfs_warn(mp, + "logbuf size must be greater than or equal to log stripe size"); return XFS_ERROR(EINVAL); } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { - cmn_err(CE_WARN, - "XFS: logbuf size for version 1 logs must be 16K or 32K"); + xfs_warn(mp, + "logbuf size for version 1 logs must be 16K or 32K"); return XFS_ERROR(EINVAL); } } @@ -1477,8 +1400,8 @@ xfs_finish_flags( * prohibit r/w mounts of read-only filesystems */ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { - cmn_err(CE_WARN, - "XFS: cannot mount a read-only filesystem as read-write"); + xfs_warn(mp, + "cannot mount a read-only filesystem as read-write"); return XFS_ERROR(EROFS); } @@ -1502,9 +1425,6 @@ xfs_fs_fill_super( spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); - INIT_LIST_HEAD(&mp->m_sync_list); - spin_lock_init(&mp->m_sync_lock); - init_waitqueue_head(&mp->m_wait_single_sync_task); mp->m_super = sb; sb->s_fs_info = mp; @@ -1551,10 +1471,14 @@ xfs_fs_fill_super( if (error) goto out_free_sb; - error = xfs_mountfs(mp); - if (error) - goto out_filestream_unmount; - + /* + * we must configure the block size in the superblock before we run the + * full mount process as the mount process can lookup and cache inodes. + * For the same reason we must also initialise the syncd and register + * the inode cache shrinker so that inodes can be reclaimed during + * operations like a quotacheck that iterate all inodes in the + * filesystem. + */ sb->s_magic = XFS_SB_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; @@ -1562,6 +1486,16 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); + error = xfs_syncd_init(mp); + if (error) + goto out_filestream_unmount; + + xfs_inode_shrinker_register(mp); + + error = xfs_mountfs(mp); + if (error) + goto out_syncd_stop; + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; @@ -1577,14 +1511,11 @@ xfs_fs_fill_super( goto fail_vnrele; } - error = xfs_syncd_init(mp); - if (error) - goto fail_vnrele; - - xfs_inode_shrinker_register(mp); - return 0; + out_syncd_stop: + xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: @@ -1608,6 +1539,9 @@ xfs_fs_fill_super( } fail_unmount: + xfs_inode_shrinker_unregister(mp); + xfs_syncd_stop(mp); + /* * Blow away any referenced inode in the filestreams cache. * This can and will cause log traffic as inodes go inactive @@ -1797,6 +1731,38 @@ xfs_destroy_zones(void) } STATIC int __init +xfs_init_workqueues(void) +{ + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) + goto out; + + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); + if (!xfs_ail_wq) + goto out_destroy_syncd; + + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); +out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_workqueues(void) +{ + destroy_workqueue(xfs_ail_wq); + destroy_workqueue(xfs_syncd_wq); +} + +STATIC int __init init_xfs_fs(void) { int error; @@ -1811,10 +1777,14 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_mru_cache_init(); + error = xfs_init_workqueues(); if (error) goto out_destroy_zones; + error = xfs_mru_cache_init(); + if (error) + goto out_destroy_wq; + error = xfs_filestream_init(); if (error) goto out_mru_cache_uninit; @@ -1848,6 +1818,8 @@ init_xfs_fs(void) xfs_filestream_uninit(); out_mru_cache_uninit: xfs_mru_cache_uninit(); + out_destroy_wq: + xfs_destroy_workqueues(); out_destroy_zones: xfs_destroy_zones(); out: @@ -1864,6 +1836,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); + xfs_destroy_workqueues(); xfs_destroy_zones(); } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index e22f0057d21f..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -22,6 +22,7 @@ #include "xfs_log.h" #include "xfs_inum.h" #include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -39,6 +40,8 @@ #include <linux/kthread.h> #include <linux/freezer.h> +struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between @@ -264,6 +267,16 @@ xfs_sync_inode_attr( error = xfs_iflush(ip, flags); + /* + * We don't want to try again on non-blocking flushes that can't run + * again immediately. If an inode really must be written, then that's + * what the SYNC_WAIT flag is for. + */ + if (error == EAGAIN) { + ASSERT(!(flags & SYNC_WAIT)); + error = 0; + } + out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); return error; @@ -401,7 +414,7 @@ xfs_quiesce_fs( /* * Second stage of a quiesce. The data is already synced, now we have to take * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceding. + * wait for any remaining transactions to drain out before proceeding. */ void xfs_quiesce_attr( @@ -425,69 +438,18 @@ xfs_quiesce_attr( /* Push the superblock and write an unmount record */ error = xfs_log_sbcount(mp, 1); if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_attr_quiesce: failed to log sb changes. " + xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " "Frozen image may not be consistent."); xfs_log_unmount_write(mp); xfs_unmountfs_writesb(mp); } -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *), - struct completion *completion) -{ - struct xfs_sync_work *work; - - work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - work->w_completion = completion; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inodes_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); - iput(inode); -} - -void -xfs_flush_inodes( - xfs_inode_t *ip) +static void +xfs_syncd_queue_sync( + struct xfs_mount *mp) { - struct inode *inode = VFS_I(ip); - DECLARE_COMPLETION_ONSTACK(completion); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); - wait_for_completion(&completion); - xfs_log_force(ip->i_mount, XFS_LOG_SYNC); + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); } /* @@ -497,9 +459,10 @@ xfs_flush_inodes( */ STATIC void xfs_sync_worker( - struct xfs_mount *mp, - void *unused) + struct work_struct *work) { + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_sync_work); int error; if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { @@ -509,73 +472,106 @@ xfs_sync_worker( error = xfs_fs_log_dummy(mp); else xfs_log_force(mp, 0); - xfs_reclaim_inodes(mp, 0); error = xfs_qm_sync(mp, SYNC_TRYLOCK); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); } - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); + + /* queue us up again */ + xfs_syncd_queue_sync(mp); } -STATIC int -xfssyncd( - void *arg) +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) { - struct xfs_mount *mp = arg; - long timeleft; - xfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - if (list_empty(&mp->m_sync_list)) - timeleft = schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } - list_splice_init(&mp->m_sync_list, &tmp); - spin_unlock(&mp->m_sync_lock); + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; - if (work->w_completion) - complete(work->w_completion); - kmem_free(work); - } + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); } + rcu_read_unlock(); +} - return 0; +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room. + * + * Queue a new data flush if there isn't one already in progress and + * wait for completion of the flush. This means that we only ever have one + * inode flush in progress no matter how many ENOSPC events are occurring and + * so will prevent the system from bogging down due to every concurrent + * ENOSPC event scanning all the active inodes in the system for writeback. + */ +void +xfs_flush_inodes( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + queue_work(xfs_syncd_wq, &mp->m_flush_work); + flush_work_sync(&mp->m_flush_work); +} + +STATIC void +xfs_flush_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, + struct xfs_mount, m_flush_work); + + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); } int xfs_syncd_init( struct xfs_mount *mp) { - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_work.w_completion = NULL; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); - if (IS_ERR(mp->m_sync_task)) - return -PTR_ERR(mp->m_sync_task); + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); + return 0; } @@ -583,7 +579,9 @@ void xfs_syncd_stop( struct xfs_mount *mp) { - kthread_stop(mp->m_sync_task); + cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); + cancel_work_sync(&mp->m_flush_work); } void @@ -602,6 +600,10 @@ __xfs_inode_set_reclaim_tag( XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), XFS_ICI_RECLAIM_TAG); spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, -1, _RET_IP_); } @@ -762,8 +764,10 @@ xfs_reclaim_inode( struct xfs_perag *pag, int sync_mode) { - int error = 0; + int error; +restart: + error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iflock_nowait(ip)) { if (!(sync_mode & SYNC_WAIT)) @@ -789,9 +793,31 @@ xfs_reclaim_inode( if (xfs_inode_clean(ip)) goto reclaim; - /* Now we have an inode that needs flushing */ - error = xfs_iflush(ip, sync_mode); + /* + * Now we have an inode that needs flushing. + * + * We do a nonblocking flush here even if we are doing a SYNC_WAIT + * reclaim as we can deadlock with inode cluster removal. + * xfs_ifree_cluster() can lock the inode buffer before it locks the + * ip->i_lock, and we are doing the exact opposite here. As a result, + * doing a blocking xfs_itobp() to get the cluster buffer will result + * in an ABBA deadlock with xfs_ifree_cluster(). + * + * As xfs_ifree_cluser() must gather all inodes that are active in the + * cache to mark them stale, if we hit this case we don't actually want + * to do IO here - we want the inode marked stale so we can simply + * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, + * just unlock the inode, back off and try again. Hopefully the next + * pass through will see the stale flag set on the inode. + */ + error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); if (sync_mode & SYNC_WAIT) { + if (error == EAGAIN) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* backoff longer than in xfs_ifree_cluster */ + delay(2); + goto restart; + } xfs_iflock(ip); goto reclaim; } @@ -806,7 +832,7 @@ xfs_reclaim_inode( * pass on the error. */ if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "inode 0x%llx background reclaim flush failed with %d", (long long)ip->i_ino, error); } @@ -910,6 +936,7 @@ restart: XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { + done = 1; rcu_read_unlock(); break; } @@ -994,25 +1021,37 @@ xfs_reclaim_inodes( } /* - * Shrinker infrastructure. + * Inode cache shrinker. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doiing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. */ static int xfs_reclaim_inode_shrink( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { struct xfs_mount *mp; struct xfs_perag *pag; xfs_agnumber_t ag; int reclaimable; + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; mp = container_of(shrink, struct xfs_mount, m_inode_shrink); if (nr_to_scan) { + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); + if (!(gfp_mask & __GFP_FS)) return -1; - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, + &nr_to_scan); /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -32,6 +32,8 @@ typedef struct xfs_sync_work { #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ +extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index ee3cee097e7e..ee2d2adaa438 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler( ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { - printk("XFS Clearing xfsstats\n"); + xfs_notice(NULL, "Clearing xfsstats"); for_each_possible_cpu(c) { preempt_disable(); /* save vn_active, it's a universal truth! */ diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 2d0bcb479075..d48b7a579ae1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, ); -#define XFS_BUSY_SYNC \ - { 0, "async" }, \ - { 1, "sync" } - -TRACE_EVENT(xfs_alloc_busy, - TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len, int sync), - TP_ARGS(trans, agno, agbno, len, sync), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct xfs_trans *, tp) - __field(int, tid) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - __field(int, sync) - ), - TP_fast_assign( - __entry->dev = trans->t_mountp->m_super->s_dev; - __entry->tp = trans; - __entry->tid = trans->t_ticket->t_tid; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - __entry->sync = sync; - ), - TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tp, - __entry->tid, - __entry->agno, - __entry->agbno, - __entry->len, - __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) - -); - -TRACE_EVENT(xfs_alloc_unbusy, +DECLARE_EVENT_CLASS(xfs_busy_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t len), TP_ARGS(mp, agno, agbno, len), @@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, __entry->agbno, __entry->len) ); +#define DEFINE_BUSY_EVENT(name) \ +DEFINE_EVENT(xfs_busy_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_BUSY_EVENT(xfs_alloc_busy); +DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); +DEFINE_BUSY_EVENT(xfs_alloc_busy_force); +DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); +DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); -#define XFS_BUSY_STATES \ - { 0, "missing" }, \ - { 1, "found" } - -TRACE_EVENT(xfs_alloc_busysearch, +TRACE_EVENT(xfs_alloc_busy_trim, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len, int found), - TP_ARGS(mp, agno, agbno, len, found), + xfs_agblock_t agbno, xfs_extlen_t len, + xfs_agblock_t tbno, xfs_extlen_t tlen), + TP_ARGS(mp, agno, agbno, len, tbno, tlen), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) - __field(int, found) + __field(xfs_agblock_t, tbno) + __field(xfs_extlen_t, tlen) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; __entry->agbno = agbno; __entry->len = len; - __entry->found = found; + __entry->tbno = tbno; + __entry->tlen = tlen; ), - TP_printk("dev %d:%d agno %u agbno %u len %u %s", + TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, __entry->len, - __print_symbolic(__entry->found, XFS_BUSY_STATES)) + __entry->tbno, + __entry->tlen) ); TRACE_EVENT(xfs_trans_commit_lsn, @@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->wasfromfl, __entry->isfl, __entry->userdata, - __entry->firstblock) + (unsigned long long)__entry->firstblock) ) #define DEFINE_ALLOC_EVENT(name) \ @@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first); DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); DEFINE_ALLOC_EVENT(xfs_alloc_near_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); DEFINE_ALLOC_EVENT(xfs_alloc_size_done); DEFINE_ALLOC_EVENT(xfs_alloc_size_error); +DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); DEFINE_ALLOC_EVENT(xfs_alloc_small_done); diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index d22aa3103106..6fa214603819 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -544,9 +544,10 @@ xfs_qm_dqtobp( /* * A simple sanity check in case we got a corrupted dquot... */ - if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, + error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), - "dqtobp")) { + "dqtobp"); + if (error) { if (!(flags & XFS_QMOPT_DQREPAIR)) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EIO); @@ -599,7 +600,7 @@ xfs_qm_dqread( /* * Reservation counters are defined as reservation plus current usage - * to avoid having to add everytime. + * to avoid having to add every time. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); @@ -827,7 +828,7 @@ xfs_qm_dqget( if (xfs_do_dqerror) { if ((xfs_dqerror_target == mp->m_ddev_targp) && (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { - cmn_err(CE_DEBUG, "Returning error in dqget"); + xfs_debug(mp, "Returning error in dqget"); return (EIO); } } @@ -1207,8 +1208,9 @@ xfs_qm_dqflush( /* * A simple sanity check in case we got a corrupted dquot.. */ - if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, - XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { + error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + XFS_QMOPT_DOWARN, "dqflush (incore copy)"); + if (error) { xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); @@ -1391,8 +1393,8 @@ xfs_qm_dqpurge( */ error = xfs_qm_dqflush(dqp, SYNC_WAIT); if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqpurge: dquot %p flush failed", dqp); + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); xfs_dqflock(dqp); } ASSERT(atomic_read(&dqp->q_pincount) == 0); @@ -1425,36 +1427,38 @@ xfs_qm_dqpurge( void xfs_qm_dqprint(xfs_dquot_t *dqp) { - cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); - cmn_err(CE_DEBUG, "---- dquotID = %d", + struct xfs_mount *mp = dqp->q_mount; + + xfs_debug(mp, "-----------KERNEL DQUOT----------------"); + xfs_debug(mp, "---- dquotID = %d", (int)be32_to_cpu(dqp->q_core.d_id)); - cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); - cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); - cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); - cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); - cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", + xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); + xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount); + xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno); + xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset); + xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_blk_hardlimit), (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); - cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", + xfs_debug(mp, "---- blkslimit = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_blk_softlimit), (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); - cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", + xfs_debug(mp, "---- inohlimit = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_ino_hardlimit), (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); - cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", + xfs_debug(mp, "---- inoslimit = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_ino_softlimit), (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); - cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", + xfs_debug(mp, "---- bcount = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_bcount), (int)be64_to_cpu(dqp->q_core.d_bcount)); - cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", + xfs_debug(mp, "---- icount = %Lu (0x%x)", be64_to_cpu(dqp->q_core.d_icount), (int)be64_to_cpu(dqp->q_core.d_icount)); - cmn_err(CE_DEBUG, "---- btimer = %d", + xfs_debug(mp, "---- btimer = %d", (int)be32_to_cpu(dqp->q_core.d_btimer)); - cmn_err(CE_DEBUG, "---- itimer = %d", + xfs_debug(mp, "---- itimer = %d", (int)be32_to_cpu(dqp->q_core.d_itimer)); - cmn_err(CE_DEBUG, "---------------------------"); + xfs_debug(mp, "---------------------------"); } #endif diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 2a1f3dc10a02..9e0e2fa3f2c8 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push( */ error = xfs_qm_dqflush(dqp, 0); if (error) - xfs_fs_cmn_err(CE_WARN, dqp->q_mount, - "xfs_qm_dquot_logitem_push: push error %d on dqp %p", - error, dqp); + xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", + __func__, error, dqp); xfs_dqunlock(dqp); } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 206a2815ced6..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); +STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); static struct shrinker xfs_qm_shaker = { .shrink = xfs_qm_shake, @@ -80,7 +80,7 @@ xfs_qm_dquot_list_print( int i = 0; list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { - cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " + xfs_debug(mp, " %d. \"%d (%s)\" " "bcnt = %lld, icnt = %lld, refs = %d", i++, be32_to_cpu(dqp->q_core.d_id), DQFLAGTO_TYPESTR(dqp), @@ -205,7 +205,7 @@ xfs_qm_destroy( list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { xfs_dqlock(dqp); #ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); + xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp); #endif list_del_init(&dqp->q_freelist); xfs_Gqm->qm_dqfrlist_cnt--; @@ -341,9 +341,7 @@ xfs_qm_mount_quotas( * quotas immediately. */ if (mp->m_sb.sb_rextents) { - cmn_err(CE_NOTE, - "Cannot turn on quotas for realtime filesystem %s", - mp->m_fsname); + xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); mp->m_qflags = 0; goto write_changes; } @@ -402,14 +400,13 @@ xfs_qm_mount_quotas( * off, but the on disk superblock doesn't know that ! */ ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); - xfs_fs_cmn_err(CE_ALERT, mp, - "XFS mount_quotas: Superblock update failed!"); + xfs_alert(mp, "%s: Superblock update failed!", + __func__); } } if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "Failed to initialize disk quotas."); + xfs_warn(mp, "Failed to initialize disk quotas."); return; } @@ -464,12 +461,10 @@ xfs_qm_dqflush_all( struct xfs_quotainfo *q = mp->m_quotainfo; int recl; struct xfs_dquot *dqp; - int niters; int error; if (!q) return 0; - niters = 0; again: mutex_lock(&q->qi_dqlist_lock); list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { @@ -1230,13 +1225,6 @@ xfs_qm_qino_alloc( } /* - * Keep an extra reference to this quota inode. This inode is - * locked exclusively and joined to the transaction already. - */ - ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); - IHOLD(*ip); - - /* * Make the changes in the superblock, and log those too. * sbfields arg may contain fields other than *QUOTINO; * VERSIONNUM for example. @@ -1264,7 +1252,7 @@ xfs_qm_qino_alloc( xfs_mod_sb(tp, sbfields); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { - xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); + xfs_alert(mp, "%s failed (error %d)!", __func__, error); return error; } return 0; @@ -1299,7 +1287,7 @@ xfs_qm_reset_dqcounts( * output any warnings because it's perfectly possible to * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. */ - (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, + (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, "xfs_quotacheck"); ddq->d_bcount = 0; ddq->d_icount = 0; @@ -1324,14 +1312,9 @@ xfs_qm_dqiter_bufs( { xfs_buf_t *bp; int error; - int notcommitted; - int incr; int type; ASSERT(blkcnt > 0); - notcommitted = 0; - incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ? - XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt; type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; @@ -1676,7 +1659,7 @@ xfs_qm_quotacheck( */ ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); - cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); + xfs_notice(mp, "Quotacheck needed: Please wait."); /* * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset @@ -1754,9 +1737,9 @@ xfs_qm_quotacheck( error_return: if (error) { - cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " - "Disabling quotas.", - mp->m_fsname, error); + xfs_warn(mp, + "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", + error); /* * We must turn off quotas. */ @@ -1764,12 +1747,11 @@ xfs_qm_quotacheck( ASSERT(xfs_Gqm != NULL); xfs_qm_destroy_quotainfo(mp); if (xfs_mount_reset_sbqflags(mp)) { - cmn_err(CE_WARN, "XFS quotacheck %s: " - "Failed to reset quota flags.", mp->m_fsname); + xfs_warn(mp, + "Quotacheck: Failed to reset quota flags."); } - } else { - cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); - } + } else + xfs_notice(mp, "Quotacheck: Done."); return (error); } @@ -1937,8 +1919,8 @@ again: */ error = xfs_qm_dqflush(dqp, 0); if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_qm_dqreclaim: dquot %p flush failed", dqp); + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); } goto dqunlock; } @@ -2027,10 +2009,10 @@ xfs_qm_shake_freelist( STATIC int xfs_qm_shake( struct shrinker *shrink, - int nr_to_scan, - gfp_t gfp_mask) + struct shrink_control *sc) { int ndqused, nfree, n; + gfp_t gfp_mask = sc->gfp_mask; if (!kmem_shake_allow(gfp_mask)) return 0; @@ -2115,7 +2097,7 @@ xfs_qm_write_sb_changes( int error; #ifdef QUOTADEBUG - cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); + xfs_notice(mp, "Writing superblock quota changes"); #endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); if ((error = xfs_trans_reserve(tp, 0, diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index c9446f1c726d..567b29b9f1b3 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone; * block in the dquot/xqm code. */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 -/* - * When doing a quotacheck, we log dquot clusters of this many FSBs at most - * in a single transaction. We don't want to ask for too huge a log reservation. - */ -#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 typedef xfs_dqhash_t xfs_dqlist_t; diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 45b5cb1788ab..a0a829addca9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -119,8 +119,7 @@ xfs_qm_newmount( (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && xfs_dev_is_read_only(mp, "changing quota state")) { - cmn_err(CE_WARN, - "XFS: please mount with%s%s%s%s.", + xfs_warn(mp, "please mount with%s%s%s%s.", (!quotaondisk ? "out quota" : ""), (uquotaondisk ? " usrquota" : ""), (pquotaondisk ? " prjquota" : ""), @@ -135,7 +134,7 @@ xfs_qm_newmount( */ if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { /* - * If an error occured, qm_mount_quotas code + * If an error occurred, qm_mount_quotas code * has already disabled quotas. So, just finish * mounting, and get on with the boring life * without disk quotas. diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index bdebc183223e..2dadb15d5ca9 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -41,12 +41,6 @@ #include "xfs_qm.h" #include "xfs_trace.h" -#ifdef DEBUG -# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args) -#else -# define qdprintk(s, args...) do { } while (0) -#endif - STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, uint); @@ -178,7 +172,7 @@ xfs_qm_scall_quotaoff( /* * Next we make the changes in the quota flag in the mount struct. * This isn't protected by a particular lock directly, because we - * don't want to take a mrlock everytime we depend on quotas being on. + * don't want to take a mrlock every time we depend on quotas being on. */ mp->m_qflags &= ~(flags); @@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles( int error = 0, error2 = 0; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { - qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); + xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", + __func__, flags, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -318,20 +313,19 @@ xfs_qm_scall_quotaon( { int error; uint qf; - uint accflags; __int64_t sbflags; flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); /* * Switching on quota accounting must be done at mount time. */ - accflags = flags & XFS_ALL_QUOTA_ACCT; flags &= ~(XFS_ALL_QUOTA_ACCT); sbflags = 0; if (flags == 0) { - qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); + xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", + __func__, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -352,12 +346,13 @@ xfs_qm_scall_quotaon( (flags & XFS_GQUOTA_ACCT) == 0 && (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && (flags & XFS_OQUOTA_ENFD))) { - qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", - flags, mp->m_sb.sb_qflags); + xfs_debug(mp, + "%s: Can't enforce without acct, flags=%x sbflags=%x\n", + __func__, flags, mp->m_sb.sb_qflags); return XFS_ERROR(EINVAL); } /* - * If everything's upto-date incore, then don't waste time. + * If everything's up to-date incore, then don't waste time. */ if ((mp->m_qflags & flags) == flags) return XFS_ERROR(EEXIST); @@ -541,7 +536,7 @@ xfs_qm_scall_setqlim( q->qi_bsoftlimit = soft; } } else { - qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); + xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : @@ -557,7 +552,7 @@ xfs_qm_scall_setqlim( q->qi_rtbsoftlimit = soft; } } else { - qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); + xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? @@ -574,7 +569,7 @@ xfs_qm_scall_setqlim( q->qi_isoftlimit = soft; } } else { - qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); + xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); } /* @@ -939,10 +934,11 @@ struct mutex qcheck_lock; #define DQTEST_LIST_PRINT(l, NXT, title) \ { \ xfs_dqtest_t *dqp; int i = 0;\ - cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ + xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \ for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ dqp = (xfs_dqtest_t *)dqp->NXT) { \ - cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ + xfs_debug(dqp->q_mount, \ + " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ dqp->d_bcount, dqp->d_icount); } \ } @@ -966,16 +962,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) } STATIC void xfs_qm_dqtest_print( - xfs_dqtest_t *d) + struct xfs_mount *mp, + struct dqtest *d) { - cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); - cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); - cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); - cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", + xfs_debug(mp, "-----------DQTEST DQUOT----------------"); + xfs_debug(mp, "---- dquot ID = %d", d->d_id); + xfs_debug(mp, "---- fs = 0x%p", d->q_mount); + xfs_debug(mp, "---- bcount = %Lu (0x%x)", d->d_bcount, (int)d->d_bcount); - cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", + xfs_debug(mp, "---- icount = %Lu (0x%x)", d->d_icount, (int)d->d_icount); - cmn_err(CE_DEBUG, "---------------------------"); + xfs_debug(mp, "---------------------------"); } STATIC void @@ -989,12 +986,14 @@ xfs_qm_dqtest_failed( { qmtest_nfails++; if (error) - cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", - d->d_id, error, reason); + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d, err=%d\nreason: %s", + d->d_id, error, reason); else - cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", - d->d_id, reason, (int)a, (int)b); - xfs_qm_dqtest_print(d); + xfs_debug(dqp->q_mount, + "quotacheck failed id=%d (%s) [%d != %d]", + d->d_id, reason, (int)a, (int)b); + xfs_qm_dqtest_print(dqp->q_mount, d); if (dqp) xfs_qm_dqprint(dqp); } @@ -1021,9 +1020,9 @@ xfs_dqtest_cmp2( be64_to_cpu(dqp->q_core.d_bcount) >= be64_to_cpu(dqp->q_core.d_blk_softlimit)) { if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { - cmn_err(CE_DEBUG, - "%d [%s] [0x%p] BLK TIMER NOT STARTED", - d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + xfs_debug(dqp->q_mount, + "%d [%s] BLK TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); err++; } } @@ -1031,16 +1030,16 @@ xfs_dqtest_cmp2( be64_to_cpu(dqp->q_core.d_icount) >= be64_to_cpu(dqp->q_core.d_ino_softlimit)) { if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { - cmn_err(CE_DEBUG, - "%d [%s] [0x%p] INO TIMER NOT STARTED", - d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + xfs_debug(dqp->q_mount, + "%d [%s] INO TIMER NOT STARTED", + d->d_id, DQFLAGTO_TYPESTR(d)); err++; } } #ifdef QUOTADEBUG if (!err) { - cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", - d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); + xfs_debug(dqp->q_mount, "%d [%s] qchecked", + d->d_id, DQFLAGTO_TYPESTR(d)); } #endif return (err); @@ -1137,8 +1136,8 @@ xfs_qm_internalqcheck_adjust( if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { *res = BULKSTAT_RV_NOTHING; - qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", - (unsigned long long) ino, + xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n", + __func__, (unsigned long long) ino, (unsigned long long) mp->m_sb.sb_uquotino, (unsigned long long) mp->m_sb.sb_gquotino); return XFS_ERROR(EINVAL); @@ -1223,12 +1222,12 @@ xfs_qm_internalqcheck( xfs_qm_internalqcheck_adjust, 0, NULL, &done); if (error) { - cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); + xfs_debug(mp, "Bulkstat returned error 0x%x", error); break; } } while (!done); - cmn_err(CE_DEBUG, "Checking results against system dquots"); + xfs_debug(mp, "Checking results against system dquots"); for (i = 0; i < qmtest_hashmask; i++) { xfs_dqtest_t *d, *n; xfs_dqhash_t *h; @@ -1246,10 +1245,10 @@ xfs_qm_internalqcheck( } if (qmtest_nfails) { - cmn_err(CE_DEBUG, "******** quotacheck failed ********"); - cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); + xfs_debug(mp, "******** quotacheck failed ********"); + xfs_debug(mp, "failures = %d", qmtest_nfails); } else { - cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); + xfs_debug(mp, "******** quotacheck successful! ********"); } kmem_free(qmtest_udqtab); kmem_free(qmtest_gdqtab); diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 7de91d1b75c0..2a3648731331 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -643,8 +643,9 @@ xfs_trans_dqresv( (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { #ifdef QUOTADEBUG - cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" - " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); + xfs_debug(mp, + "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?", + nblks, *resbcountp, hardlimit); #endif if (nblks > 0) { /* diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c deleted file mode 100644 index 0df88897ef84..000000000000 --- a/fs/xfs/support/debug.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include <xfs.h> -#include "debug.h" - -/* xfs_mount.h drags a lot of crap in, sorry.. */ -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_error.h" - -void -cmn_err( - const char *lvl, - const char *fmt, - ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - - printk("%s%pV", lvl, &vaf); - va_end(args); - - BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); -} - -void -xfs_fs_cmn_err( - const char *lvl, - struct xfs_mount *mp, - const char *fmt, - ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - - printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf); - va_end(args); - - BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0); -} - -/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */ -void -xfs_cmn_err( - int panic_tag, - const char *lvl, - struct xfs_mount *mp, - const char *fmt, - ...) -{ - struct va_format vaf; - va_list args; - int do_panic = 0; - - if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { - printk(KERN_ALERT "XFS: Transforming an alert into a BUG."); - do_panic = 1; - } - - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf); - va_end(args); - - BUG_ON(do_panic); -} - -void -assfail(char *expr, char *file, int line) -{ - printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr, - file, line); - BUG(); -} - -void -xfs_hex_dump(void *p, int length) -{ - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); -} diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h deleted file mode 100644 index 05699f67d475..000000000000 --- a/fs/xfs/support/debug.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_DEBUG_H__ -#define __XFS_SUPPORT_DEBUG_H__ - -#include <stdarg.h> - -struct xfs_mount; - -#define CE_DEBUG KERN_DEBUG -#define CE_CONT KERN_INFO -#define CE_NOTE KERN_NOTICE -#define CE_WARN KERN_WARNING -#define CE_ALERT KERN_ALERT -#define CE_PANIC KERN_EMERG - -void cmn_err(const char *lvl, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp, - const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); -void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp, - const char *fmt, ...) __attribute__ ((format (printf, 4, 5))); - -extern void assfail(char *expr, char *f, int l); - -#define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef DEBUG -#define ASSERT(expr) ((void)0) - -#ifndef STATIC -# define STATIC static noinline -#endif - -#else /* DEBUG */ - -#define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef STATIC -# define STATIC noinline -#endif - -#endif /* DEBUG */ -#endif /* __XFS_SUPPORT_DEBUG_H__ */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 58632cc17f2d..6530769a999b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -187,7 +187,9 @@ struct xfs_busy_extent { xfs_agnumber_t agno; xfs_agblock_t bno; xfs_extlen_t length; - xlog_tid_t tid; /* transaction that created this */ + unsigned int flags; +#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ +#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ }; /* diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index f3227984a9bf..95862bbff56b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -41,19 +41,13 @@ #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -/* - * Prototypes for per-ag allocation routines - */ - STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); - -/* - * Internal functions. - */ + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, + xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); /* * Lookup the record equal to [bno, len] in the btree given by cur. @@ -147,27 +141,28 @@ xfs_alloc_get_rec( */ STATIC void xfs_alloc_compute_aligned( + xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ - xfs_extlen_t alignment, /* alignment for allocation */ - xfs_extlen_t minlen, /* minimum length for allocation */ xfs_agblock_t *resbno, /* result block number */ xfs_extlen_t *reslen) /* result length */ { xfs_agblock_t bno; - xfs_extlen_t diff; xfs_extlen_t len; - if (alignment > 1 && foundlen >= minlen) { - bno = roundup(foundbno, alignment); - diff = bno - foundbno; - len = diff >= foundlen ? 0 : foundlen - diff; + /* Trim busy sections out of found extent */ + xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); + + if (args->alignment > 1 && len >= args->minlen) { + xfs_agblock_t aligned_bno = roundup(bno, args->alignment); + xfs_extlen_t diff = aligned_bno - bno; + + *resbno = aligned_bno; + *reslen = diff >= len ? 0 : len - diff; } else { - bno = foundbno; - len = foundlen; + *resbno = bno; + *reslen = len; } - *resbno = bno; - *reslen = len; } /* @@ -281,7 +276,6 @@ xfs_alloc_fix_minleft( return 1; agf = XFS_BUF_TO_AGF(args->agbp); diff = be32_to_cpu(agf->agf_freeblks) - + be32_to_cpu(agf->agf_flcount) - args->len - args->minleft; if (diff >= 0) return 1; @@ -464,6 +458,27 @@ xfs_alloc_read_agfl( return 0; } +STATIC int +xfs_alloc_update_counters( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agbp, + long len) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + + pag->pagf_freeblks += len; + be32_add_cpu(&agf->agf_freeblks, len); + + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > + be32_to_cpu(agf->agf_length))) + return EFSCORRUPTED; + + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; +} + /* * Allocation group level functions. */ @@ -505,49 +520,36 @@ xfs_alloc_ag_vextent( ASSERT(0); /* NOTREACHED */ } - if (error) + + if (error || args->agbno == NULLAGBLOCK) return error; - /* - * If the allocation worked, need to change the agf structure - * (and log it), and the superblock. - */ - if (args->agbno != NULLAGBLOCK) { - xfs_agf_t *agf; /* allocation group freelist header */ - long slen = (long)args->len; - ASSERT(args->len >= args->minlen && args->len <= args->maxlen); - ASSERT(!(args->wasfromfl) || !args->isfl); - ASSERT(args->agbno % args->alignment == 0); - if (!(args->wasfromfl)) { - - agf = XFS_BUF_TO_AGF(args->agbp); - be32_add_cpu(&agf->agf_freeblks, -(args->len)); - xfs_trans_agblocks_delta(args->tp, - -((long)(args->len))); - args->pag->pagf_freeblks -= args->len; - ASSERT(be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length)); - xfs_alloc_log_agf(args->tp, args->agbp, - XFS_AGF_FREEBLKS); - /* - * Search the busylist for these blocks and mark the - * transaction as synchronous if blocks are found. This - * avoids the need to block due to a synchronous log - * force to ensure correct ordering as the synchronous - * transaction will guarantee that for us. - */ - if (xfs_alloc_busy_search(args->mp, args->agno, - args->agbno, args->len)) - xfs_trans_set_sync(args->tp); - } - if (!args->isfl) - xfs_trans_mod_sb(args->tp, - args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : - XFS_TRANS_SB_FDBLOCKS, -slen); - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(!args->wasfromfl || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + + if (!args->wasfromfl) { + error = xfs_alloc_update_counters(args->tp, args->pag, + args->agbp, + -((long)(args->len))); + if (error) + return error; + + ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, + args->agbno, args->len)); } - return 0; + + if (!args->isfl) { + xfs_trans_mod_sb(args->tp, args->wasdel ? + XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, + -((long)(args->len))); + } + + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + return error; } /* @@ -562,14 +564,14 @@ xfs_alloc_ag_vextent_exact( { xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ - xfs_agblock_t end; /* end of allocated extent */ int error; xfs_agblock_t fbno; /* start block of found extent */ - xfs_agblock_t fend; /* end block of found extent */ xfs_extlen_t flen; /* length of found extent */ + xfs_agblock_t tbno; /* start block of trimmed extent */ + xfs_extlen_t tlen; /* length of trimmed extent */ + xfs_agblock_t tend; /* end block of trimmed extent */ + xfs_agblock_t end; /* end of allocated extent */ int i; /* success/failure of operation */ - xfs_agblock_t maxend; /* end of maximal extent */ - xfs_agblock_t minend; /* end of minimal extent */ xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); @@ -599,14 +601,22 @@ xfs_alloc_ag_vextent_exact( goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); ASSERT(fbno <= args->agbno); - minend = args->agbno + args->minlen; - maxend = args->agbno + args->maxlen; - fend = fbno + flen; /* - * Give up if the freespace isn't long enough for the minimum request. + * Check for overlapping busy extents. */ - if (fend < minend) + xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); + + /* + * Give up if the start of the extent is busy, or the freespace isn't + * long enough for the minimum request. + */ + if (tbno > args->agbno) + goto not_found; + if (tlen < args->minlen) + goto not_found; + tend = tbno + tlen; + if (tend < args->agbno + args->minlen) goto not_found; /* @@ -615,14 +625,14 @@ xfs_alloc_ag_vextent_exact( * * Fix the length according to mod and prod if given. */ - end = XFS_AGBLOCK_MIN(fend, maxend); + end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); args->len = end - args->agbno; xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) goto not_found; rlen = args->len; - ASSERT(args->agbno + rlen <= fend); + ASSERT(args->agbno + rlen <= tend); end = args->agbno + rlen; /* @@ -671,11 +681,11 @@ xfs_alloc_find_best_extent( struct xfs_btree_cur **scur, /* searching cursor */ xfs_agblock_t gdiff, /* difference for search comparison */ xfs_agblock_t *sbno, /* extent found by search */ - xfs_extlen_t *slen, - xfs_extlen_t *slena, /* aligned length */ + xfs_extlen_t *slen, /* extent length */ + xfs_agblock_t *sbnoa, /* aligned extent found by search */ + xfs_extlen_t *slena, /* aligned extent length */ int dir) /* 0 = search right, 1 = search left */ { - xfs_agblock_t bno; xfs_agblock_t new; xfs_agblock_t sdiff; int error; @@ -693,17 +703,16 @@ xfs_alloc_find_best_extent( if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, - args->minlen, &bno, slena); + xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); /* * The good extent is closer than this one. */ if (!dir) { - if (bno >= args->agbno + gdiff) + if (*sbnoa >= args->agbno + gdiff) goto out_use_good; } else { - if (bno <= args->agbno - gdiff) + if (*sbnoa <= args->agbno - gdiff) goto out_use_good; } @@ -715,8 +724,8 @@ xfs_alloc_find_best_extent( xfs_alloc_fix_len(args); sdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, *sbno, - *slen, &new); + args->alignment, *sbnoa, + *slena, &new); /* * Choose closer size and invalidate other cursor. @@ -766,7 +775,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena = 0; /* aligned ... */ + xfs_extlen_t gtlena; /* aligned ... */ xfs_agblock_t gtnew; /* useful start bno of right side */ int error; /* error code */ int i; /* result code, temporary */ @@ -775,9 +784,10 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena = 0; /* aligned ... */ + xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; #if defined(DEBUG) && defined(__KERNEL__) /* * Randomly don't execute the first algorithm. @@ -786,13 +796,20 @@ xfs_alloc_ag_vextent_near( dofirst = random32() & 1; #endif + +restart: + bno_cur_lt = NULL; + bno_cur_gt = NULL; + ltlen = 0; + gtlena = 0; + ltlena = 0; + /* * Get a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); - ltlen = 0; - bno_cur_lt = bno_cur_gt = NULL; + /* * See if there are any free extents as big as maxlen. */ @@ -808,11 +825,13 @@ xfs_alloc_ag_vextent_near( goto error0; if (i == 0 || ltlen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_noentry(args); return 0; } ASSERT(i == 1); } args->wasfromfl = 0; + /* * First algorithm. * If the requested extent is large wrt the freespaces available @@ -866,8 +885,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, - args->minlen, <bnoa, <lena); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); if (ltlena < args->minlen) continue; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); @@ -876,7 +895,7 @@ xfs_alloc_ag_vextent_near( if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { bdiff = ltdiff; @@ -987,8 +1006,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, - args->minlen, <bnoa, <lena); + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); if (ltlena >= args->minlen) break; if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) @@ -1003,8 +1022,8 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, - args->minlen, >bnoa, >lena); + xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena); if (gtlena >= args->minlen) break; if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) @@ -1028,11 +1047,12 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, ltbnoa, ltlena, <new); error = xfs_alloc_find_best_extent(args, &bno_cur_lt, &bno_cur_gt, - ltdiff, >bno, >len, >lena, + ltdiff, >bno, >len, + >bnoa, >lena, 0 /* search right */); } else { ASSERT(gtlena >= args->minlen); @@ -1043,11 +1063,12 @@ xfs_alloc_ag_vextent_near( args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, gtbno, gtlen, >new); + args->alignment, gtbnoa, gtlena, >new); error = xfs_alloc_find_best_extent(args, &bno_cur_gt, &bno_cur_lt, - gtdiff, <bno, <len, <lena, + gtdiff, <bno, <len, + <bnoa, <lena, 1 /* search left */); } @@ -1059,6 +1080,12 @@ xfs_alloc_ag_vextent_near( * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + if (!forced++) { + trace_xfs_alloc_near_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; @@ -1093,12 +1120,13 @@ xfs_alloc_ag_vextent_near( return 0; } rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, - ltlen, <new); + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, + ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltbno + ltlen); + ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, ltnew, rlen, XFSA_FIXUP_BNO_OK))) goto error0; @@ -1141,26 +1169,35 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; +restart: /* * Allocate and initialize a cursor for the by-size btree. */ cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + /* * Look for an entry >= maxlen+alignment-1 blocks. */ if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen + args->alignment - 1, &i))) goto error0; + /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. + * If none or we have busy extents that we cannot allocate from, then + * we have to settle for a smaller extent. In the case that there are + * no large extents, this will return the last entry in the tree unless + * the tree is empty. In the case that there are only busy large + * extents, this will return the largest small extent unless there + * are no smaller extents available. */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, - &flen, &i))) + if (!i || forced > 1) { + error = xfs_alloc_ag_vextent_small(args, cnt_cur, + &fbno, &flen, &i); + if (error) goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); @@ -1168,23 +1205,56 @@ xfs_alloc_ag_vextent_size( return 0; } ASSERT(i == 1); + xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + } else { + /* + * Search for a non-busy extent that is large enough. + * If we are at low space, don't check, or if we fall of + * the end of the btree, turn off the busy check and + * restart. + */ + for (;;) { + error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + + if (rlen >= args->maxlen) + break; + + error = xfs_btree_increment(cnt_cur, 0, &i); + if (error) + goto error0; + if (i == 0) { + /* + * Our only valid extents must have been busy. + * Make it unbusy by forcing the log out and + * retrying. If we've been here before, forcing + * the log isn't making the extents available, + * which means they have probably been freed in + * this transaction. In that case, we have to + * give up on them and we'll attempt a minlen + * allocation the next time around. + */ + xfs_btree_del_cursor(cnt_cur, + XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + if (!forced++) + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + } } - /* - * There's a freespace as big as maxlen+alignment-1, get it. - */ - else { - if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } + /* * In the first case above, we got the last entry in the * by-size btree. Now we check to see if the space hits maxlen * once aligned; if not, we search left for something better. * This can't happen in the second case above. */ - xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, - &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); @@ -1209,8 +1279,8 @@ xfs_alloc_ag_vextent_size( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (flen < bestrlen) break; - xfs_alloc_compute_aligned(fbno, flen, args->alignment, - args->minlen, &rbno, &rlen); + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), @@ -1238,13 +1308,19 @@ xfs_alloc_ag_vextent_size( * Fix up the length. */ args->len = rlen; - xfs_alloc_fix_len(args); - if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - trace_xfs_alloc_size_nominleft(args); - args->agbno = NULLAGBLOCK; - return 0; + if (rlen < args->minlen) { + if (!forced++) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + goto out_nominleft; } + xfs_alloc_fix_len(args); + + if (!xfs_alloc_fix_minleft(args)) + goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); /* @@ -1274,6 +1350,12 @@ error0: if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); return error; + +out_nominleft: + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_nominleft(args); + args->agbno = NULLAGBLOCK; + return 0; } /* @@ -1313,6 +1395,9 @@ xfs_alloc_ag_vextent_small( if (error) goto error0; if (fbno != NULLAGBLOCK) { + xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, + args->userdata); + if (args->userdata) { xfs_buf_t *bp; @@ -1388,6 +1473,7 @@ xfs_free_ag_extent( xfs_mount_t *mp; /* mount point struct for filesystem */ xfs_agblock_t nbno; /* new starting block of freespace */ xfs_extlen_t nlen; /* new length of freespace */ + xfs_perag_t *pag; /* per allocation group data */ mp = tp->t_mountp; /* @@ -1586,45 +1672,23 @@ xfs_free_ag_extent( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; + /* * Update the freespace totals in the ag and superblock. */ - { - xfs_agf_t *agf; - xfs_perag_t *pag; /* per allocation group data */ - - pag = xfs_perag_get(mp, agno); - pag->pagf_freeblks += len; - xfs_perag_put(pag); + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_update_counters(tp, pag, agbp, len); + xfs_perag_put(pag); + if (error) + goto error0; - agf = XFS_BUF_TO_AGF(agbp); - be32_add_cpu(&agf->agf_freeblks, len); - xfs_trans_agblocks_delta(tp, len); - XFS_WANT_CORRUPTED_GOTO( - be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length), - error0); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); - if (!isfl) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); - } + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); - /* - * Since blocks move to the free list without the coordination - * used in xfs_bmap_finish, we can't allow block to be available - * for reallocation and non-transaction writing (user data) - * until we know that the transaction that moved it to the free - * list is permanently on disk. We track the blocks by declaring - * these blocks as "busy"; the busy list is maintained on a per-ag - * basis and each transaction records which entries should be removed - * when the iclog commits to disk. If a busy block is allocated, - * the iclog is pushed up to the LSN that freed the block. - */ - xfs_alloc_busy_insert(tp, agno, bno, len); return 0; error0: @@ -1919,21 +1983,6 @@ xfs_alloc_get_freelist( xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; - /* - * As blocks are freed, they are added to the per-ag busy list and - * remain there until the freeing transaction is committed to disk. - * Now that we have allocated blocks, this list must be searched to see - * if a block is being reused. If one is, then the freeing transaction - * must be pushed to disk before this transaction. - * - * We do this by setting the current transaction to a sync transaction - * which guarantees that the freeing transaction is on disk before this - * transaction. This is done instead of a synchronous log force here so - * that we don't sit and wait with the AGF locked in the transaction - * during the log force. - */ - if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) - xfs_trans_set_sync(tp); return 0; } @@ -2391,131 +2440,54 @@ xfs_free_extent( memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; + + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - ASSERT(args.agno < args.mp->m_sb.sb_agcount); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; + args.pag = xfs_perag_get(args.mp, args.agno); - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) + ASSERT(args.pag); + + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) goto error0; -#ifdef DEBUG - ASSERT(args.agbp != NULL); - ASSERT((args.agbno + len) <= - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); -#endif + + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; + } + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); + if (!error) + xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); return error; } - -/* - * AG Busy list management - * The busy list contains block ranges that have been freed but whose - * transactions have not yet hit disk. If any block listed in a busy - * list is reused, the transaction that freed it must be forced to disk - * before continuing to use the block. - * - * xfs_alloc_busy_insert - add to the per-ag busy list - * xfs_alloc_busy_clear - remove an item from the per-ag busy list - * xfs_alloc_busy_search - search for a busy extent - */ - -/* - * Insert a new extent into the busy tree. - * - * The busy extent tree is indexed by the start block of the busy extent. - * there can be multiple overlapping ranges in the busy extent tree but only - * ever one entry at a given start block. The reason for this is that - * multi-block extents can be freed, then smaller chunks of that extent - * allocated and freed again before the first transaction commit is on disk. - * If the exact same start block is freed a second time, we have to wait for - * that busy extent to pass out of the tree before the new extent is inserted. - * There are two main cases we have to handle here. - * - * The first case is a transaction that triggers a "free - allocate - free" - * cycle. This can occur during btree manipulations as a btree block is freed - * to the freelist, then allocated from the free list, then freed again. In - * this case, the second extxpnet free is what triggers the duplicate and as - * such the transaction IDs should match. Because the extent was allocated in - * this transaction, the transaction must be marked as synchronous. This is - * true for all cases where the free/alloc/free occurs in the one transaction, - * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. - * This serves to catch violations of the second case quite effectively. - * - * The second case is where the free/alloc/free occur in different - * transactions. In this case, the thread freeing the extent the second time - * can't mark the extent busy immediately because it is already tracked in a - * transaction that may be committing. When the log commit for the existing - * busy extent completes, the busy extent will be removed from the tree. If we - * allow the second busy insert to continue using that busy extent structure, - * it can be freed before this transaction is safely in the log. Hence our - * only option in this case is to force the log to remove the existing busy - * extent from the list before we insert the new one with the current - * transaction ID. - * - * The problem we are trying to avoid in the free-alloc-free in separate - * transactions is most easily described with a timeline: - * - * Thread 1 Thread 2 Thread 3 xfslogd - * xact alloc - * free X - * mark busy - * commit xact - * free xact - * xact alloc - * alloc X - * busy search - * mark xact sync - * commit xact - * free xact - * force log - * checkpoint starts - * .... - * xact alloc - * free X - * mark busy - * finds match - * *** KABOOM! *** - * .... - * log IO completes - * unbusy X - * checkpoint completes - * - * By issuing a log force in thread 3 @ "KABOOM", the thread will block until - * the checkpoint completes, and the busy extent it matched will have been - * removed from the tree when it is woken. Hence it can then continue safely. - * - * However, to ensure this matching process is robust, we need to use the - * transaction ID for identifying transaction, as delayed logging results in - * the busy extent and transaction lifecycles being different. i.e. the busy - * extent is active for a lot longer than the transaction. Hence the - * transaction structure can be freed and reallocated, then mark the same - * extent busy again in the new transaction. In this case the new transaction - * will have a different tid but can have the same address, and hence we need - * to check against the tid. - * - * Future: for delayed logging, we could avoid the log force if the extent was - * first freed in the current checkpoint sequence. This, however, requires the - * ability to pin the current checkpoint in memory until this transaction - * commits to ensure that both the original free and the current one combine - * logically into the one checkpoint. If the checkpoint sequences are - * different, however, we still need to wait on a log force. - */ void xfs_alloc_busy_insert( struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t bno, - xfs_extlen_t len) + xfs_extlen_t len, + unsigned int flags) { struct xfs_busy_extent *new; struct xfs_busy_extent *busyp; struct xfs_perag *pag; struct rb_node **rbp; - struct rb_node *parent; - int match; - + struct rb_node *parent = NULL; new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); if (!new) { @@ -2524,7 +2496,7 @@ xfs_alloc_busy_insert( * block, make this a synchronous transaction to insure that * the block is not reused before this transaction commits. */ - trace_xfs_alloc_busy(tp, agno, bno, len, 1); + trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); xfs_trans_set_sync(tp); return; } @@ -2532,66 +2504,29 @@ xfs_alloc_busy_insert( new->agno = agno; new->bno = bno; new->length = len; - new->tid = xfs_log_get_trans_ident(tp); - INIT_LIST_HEAD(&new->list); + new->flags = flags; /* trace before insert to be able to see failed inserts */ - trace_xfs_alloc_busy(tp, agno, bno, len, 0); + trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); pag = xfs_perag_get(tp->t_mountp, new->agno); -restart: spin_lock(&pag->pagb_lock); rbp = &pag->pagb_tree.rb_node; - parent = NULL; - busyp = NULL; - match = 0; - while (*rbp && match >= 0) { + while (*rbp) { parent = *rbp; busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); if (new->bno < busyp->bno) { - /* may overlap, but exact start block is lower */ rbp = &(*rbp)->rb_left; - if (new->bno + new->length > busyp->bno) - match = busyp->tid == new->tid ? 1 : -1; + ASSERT(new->bno + new->length <= busyp->bno); } else if (new->bno > busyp->bno) { - /* may overlap, but exact start block is higher */ rbp = &(*rbp)->rb_right; - if (bno < busyp->bno + busyp->length) - match = busyp->tid == new->tid ? 1 : -1; + ASSERT(bno >= busyp->bno + busyp->length); } else { - match = busyp->tid == new->tid ? 1 : -1; - break; + ASSERT(0); } } - if (match < 0) { - /* overlap marked busy in different transaction */ - spin_unlock(&pag->pagb_lock); - xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); - goto restart; - } - if (match > 0) { - /* - * overlap marked busy in same transaction. Update if exact - * start block match, otherwise combine the busy extents into - * a single range. - */ - if (busyp->bno == new->bno) { - busyp->length = max(busyp->length, new->length); - spin_unlock(&pag->pagb_lock); - ASSERT(tp->t_flags & XFS_TRANS_SYNC); - xfs_perag_put(pag); - kmem_free(new); - return; - } - rb_erase(&busyp->rb_node, &pag->pagb_tree); - new->length = max(busyp->bno + busyp->length, - new->bno + new->length) - - min(busyp->bno, new->bno); - new->bno = min(busyp->bno, new->bno); - } else - busyp = NULL; rb_link_node(&new->rb_node, parent, rbp); rb_insert_color(&new->rb_node, &pag->pagb_tree); @@ -2599,7 +2534,6 @@ restart: list_add(&new->list, &tp->t_busy); spin_unlock(&pag->pagb_lock); xfs_perag_put(pag); - kmem_free(busyp); } /* @@ -2648,31 +2582,466 @@ xfs_alloc_busy_search( } } spin_unlock(&pag->pagb_lock); - trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); xfs_perag_put(pag); return match; } +/* + * The found free extent [fbno, fend] overlaps part or all of the given busy + * extent. If the overlap covers the beginning, the end, or all of the busy + * extent, the overlapping portion can be made unbusy and used for the + * allocation. We can't split a busy extent because we can't modify a + * transaction/CIL context busy list, but we can update an entries block + * number or length. + * + * Returns true if the extent can safely be reused, or false if the search + * needs to be restarted. + */ +STATIC bool +xfs_alloc_busy_update_extent( + struct xfs_mount *mp, + struct xfs_perag *pag, + struct xfs_busy_extent *busyp, + xfs_agblock_t fbno, + xfs_extlen_t flen, + bool userdata) +{ + xfs_agblock_t fend = fbno + flen; + xfs_agblock_t bbno = busyp->bno; + xfs_agblock_t bend = bbno + busyp->length; + + /* + * This extent is currently being discarded. Give the thread + * performing the discard a chance to mark the extent unbusy + * and retry. + */ + if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { + spin_unlock(&pag->pagb_lock); + delay(1); + spin_lock(&pag->pagb_lock); + return false; + } + + /* + * If there is a busy extent overlapping a user allocation, we have + * no choice but to force the log and retry the search. + * + * Fortunately this does not happen during normal operation, but + * only if the filesystem is very low on space and has to dip into + * the AGFL for normal allocations. + */ + if (userdata) + goto out_force_log; + + if (bbno < fbno && bend > fend) { + /* + * Case 1: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +---------+ + * fbno fend + */ + + /* + * We would have to split the busy extent to be able to track + * it correct, which we cannot do because we would have to + * modify the list of busy extents attached to the transaction + * or CIL context, which is immutable. + * + * Force out the log to clear the busy extent and retry the + * search. + */ + goto out_force_log; + } else if (bbno >= fbno && bend <= fend) { + /* + * Case 2: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------+ + * fbno fend + * + * Case 3: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Case 4: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Case 5: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------------------------+ + * fbno fend + * + */ + + /* + * The busy extent is fully covered by the extent we are + * allocating, and can simply be removed from the rbtree. + * However we cannot remove it from the immutable list + * tracking busy extents in the transaction or CIL context, + * so set the length to zero to mark it invalid. + * + * We also need to restart the busy extent search from the + * tree root, because erasing the node can rearrange the + * tree topology. + */ + rb_erase(&busyp->rb_node, &pag->pagb_tree); + busyp->length = 0; + return false; + } else if (fend < bend) { + /* + * Case 6: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +---------+ + * fbno fend + * + * Case 7: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +------------------+ + * fbno fend + * + */ + busyp->bno = fend; + } else if (bbno < fbno) { + /* + * Case 8: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-------------+ + * fbno fend + * + * Case 9: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +----------------------+ + * fbno fend + */ + busyp->length = fbno - busyp->bno; + } else { + ASSERT(0); + } + + trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); + return true; + +out_force_log: + spin_unlock(&pag->pagb_lock); + xfs_log_force(mp, XFS_LOG_SYNC); + trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); + spin_lock(&pag->pagb_lock); + return false; +} + + +/* + * For a given extent [fbno, flen], make sure we can reuse it safely. + */ void -xfs_alloc_busy_clear( +xfs_alloc_busy_reuse( struct xfs_mount *mp, - struct xfs_busy_extent *busyp) + xfs_agnumber_t agno, + xfs_agblock_t fbno, + xfs_extlen_t flen, + bool userdata) { struct xfs_perag *pag; + struct rb_node *rbp; - trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, - busyp->length); + ASSERT(flen > 0); - ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, - busyp->length) == 1); + pag = xfs_perag_get(mp, agno); + spin_lock(&pag->pagb_lock); +restart: + rbp = pag->pagb_tree.rb_node; + while (rbp) { + struct xfs_busy_extent *busyp = + rb_entry(rbp, struct xfs_busy_extent, rb_node); + xfs_agblock_t bbno = busyp->bno; + xfs_agblock_t bend = bbno + busyp->length; - list_del_init(&busyp->list); + if (fbno + flen <= bbno) { + rbp = rbp->rb_left; + continue; + } else if (fbno >= bend) { + rbp = rbp->rb_right; + continue; + } - pag = xfs_perag_get(mp, busyp->agno); - spin_lock(&pag->pagb_lock); - rb_erase(&busyp->rb_node, &pag->pagb_tree); + if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, + userdata)) + goto restart; + } spin_unlock(&pag->pagb_lock); xfs_perag_put(pag); +} + +/* + * For a given extent [fbno, flen], search the busy extent list to find a + * subset of the extent that is not busy. If *rlen is smaller than + * args->minlen no suitable extent could be found, and the higher level + * code needs to force out the log and retry the allocation. + */ +STATIC void +xfs_alloc_busy_trim( + struct xfs_alloc_arg *args, + xfs_agblock_t bno, + xfs_extlen_t len, + xfs_agblock_t *rbno, + xfs_extlen_t *rlen) +{ + xfs_agblock_t fbno; + xfs_extlen_t flen; + struct rb_node *rbp; + + ASSERT(len > 0); + + spin_lock(&args->pag->pagb_lock); +restart: + fbno = bno; + flen = len; + rbp = args->pag->pagb_tree.rb_node; + while (rbp && flen >= args->minlen) { + struct xfs_busy_extent *busyp = + rb_entry(rbp, struct xfs_busy_extent, rb_node); + xfs_agblock_t fend = fbno + flen; + xfs_agblock_t bbno = busyp->bno; + xfs_agblock_t bend = bbno + busyp->length; + + if (fend <= bbno) { + rbp = rbp->rb_left; + continue; + } else if (fbno >= bend) { + rbp = rbp->rb_right; + continue; + } + + /* + * If this is a metadata allocation, try to reuse the busy + * extent instead of trimming the allocation. + */ + if (!args->userdata && + !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { + if (!xfs_alloc_busy_update_extent(args->mp, args->pag, + busyp, fbno, flen, + false)) + goto restart; + continue; + } + + if (bbno <= fbno) { + /* start overlap */ + + /* + * Case 1: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +---------+ + * fbno fend + * + * Case 2: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-------------+ + * fbno fend + * + * Case 3: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-------------+ + * fbno fend + * + * Case 4: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------+ + * fbno fend + * + * No unbusy region in extent, return failure. + */ + if (fend <= bend) + goto fail; + + /* + * Case 5: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +----------------------+ + * fbno fend + * + * Case 6: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Needs to be trimmed to: + * +-------+ + * fbno fend + */ + fbno = bend; + } else if (bend >= fend) { + /* end overlap */ + + /* + * Case 7: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +------------------+ + * fbno fend + * + * Case 8: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +--------------------------+ + * fbno fend + * + * Needs to be trimmed to: + * +-------+ + * fbno fend + */ + fend = bbno; + } else { + /* middle overlap */ + + /* + * Case 9: + * bbno bend + * +BBBBBBBBBBBBBBBBB+ + * +-----------------------------------+ + * fbno fend + * + * Can be trimmed to: + * +-------+ OR +-------+ + * fbno fend fbno fend + * + * Backward allocation leads to significant + * fragmentation of directories, which degrades + * directory performance, therefore we always want to + * choose the option that produces forward allocation + * patterns. + * Preferring the lower bno extent will make the next + * request use "fend" as the start of the next + * allocation; if the segment is no longer busy at + * that point, we'll get a contiguous allocation, but + * even if it is still busy, we will get a forward + * allocation. + * We try to avoid choosing the segment at "bend", + * because that can lead to the next allocation + * taking the segment at "fbno", which would be a + * backward allocation. We only use the segment at + * "fbno" if it is much larger than the current + * requested size, because in that case there's a + * good chance subsequent allocations will be + * contiguous. + */ + if (bbno - fbno >= args->maxlen) { + /* left candidate fits perfect */ + fend = bbno; + } else if (fend - bend >= args->maxlen * 4) { + /* right candidate has enough free space */ + fbno = bend; + } else if (bbno - fbno >= args->minlen) { + /* left candidate fits minimum requirement */ + fend = bbno; + } else { + goto fail; + } + } + + flen = fend - fbno; + } + spin_unlock(&args->pag->pagb_lock); + + if (fbno != bno || flen != len) { + trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, + fbno, flen); + } + *rbno = fbno; + *rlen = flen; + return; +fail: + /* + * Return a zero extent length as failure indications. All callers + * re-check if the trimmed extent satisfies the minlen requirement. + */ + spin_unlock(&args->pag->pagb_lock); + trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); + *rbno = fbno; + *rlen = 0; +} + +static void +xfs_alloc_busy_clear_one( + struct xfs_mount *mp, + struct xfs_perag *pag, + struct xfs_busy_extent *busyp) +{ + if (busyp->length) { + trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, + busyp->length); + rb_erase(&busyp->rb_node, &pag->pagb_tree); + } + list_del_init(&busyp->list); kmem_free(busyp); } + +/* + * Remove all extents on the passed in list from the busy extents tree. + * If do_discard is set skip extents that need to be discarded, and mark + * these as undergoing a discard operation instead. + */ +void +xfs_alloc_busy_clear( + struct xfs_mount *mp, + struct list_head *list, + bool do_discard) +{ + struct xfs_busy_extent *busyp, *n; + struct xfs_perag *pag = NULL; + xfs_agnumber_t agno = NULLAGNUMBER; + + list_for_each_entry_safe(busyp, n, list, list) { + if (busyp->agno != agno) { + if (pag) { + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); + } + pag = xfs_perag_get(mp, busyp->agno); + spin_lock(&pag->pagb_lock); + agno = busyp->agno; + } + + if (do_discard && busyp->length && + !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) + busyp->flags = XFS_ALLOC_BUSY_DISCARDED; + else + xfs_alloc_busy_clear_one(mp, pag, busyp); + } + + if (pag) { + spin_unlock(&pag->pagb_lock); + xfs_perag_put(pag); + } +} + +/* + * Callback for list_sort to sort busy extents by the AG they reside in. + */ +int +xfs_busy_extent_ag_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + return container_of(a, struct xfs_busy_extent, list)->agno - + container_of(b, struct xfs_busy_extent, list)->agno; +} diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index d0b3bc72005b..2f52b924be79 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -137,14 +137,28 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, #ifdef __KERNEL__ void xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, - xfs_agblock_t bno, xfs_extlen_t len); + xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); void -xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); +xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, + bool do_discard); int xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len); + +void +xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); + +int +xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); + +static inline void xfs_alloc_busy_sort(struct list_head *list) +{ + list_sort(NULL, list, xfs_busy_extent_ag_cmp); +} + #endif /* __KERNEL__ */ /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3916925e2584..2b3518826a69 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -95,6 +95,8 @@ xfs_allocbt_alloc_block( return 0; } + xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); + xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); @@ -118,18 +120,8 @@ xfs_allocbt_free_block( if (error) return error; - /* - * Since blocks move to the free list without the coordination used in - * xfs_bmap_finish, we can't allow block to be available for - * reallocation and non-transaction writing (user data) until we know - * that the transaction that moved it to the free list is permanently - * on disk. We track the blocks by declaring these blocks as "busy"; - * the busy list is maintained on a per-ag basis and each transaction - * records which entries should be removed when the iclog commits to - * disk. If a busy block is allocated, the iclog is pushed up to the - * LSN that freed the block. - */ - xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); + xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, + XFS_ALLOC_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); return 0; } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index dc3afd7739ff..e546a33214c9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -89,36 +89,19 @@ xfs_bmap_add_attrfork_local( int *flags); /* inode logging flags */ /* - * Called by xfs_bmapi to update file extent records and the btree - * after allocating space (or doing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_add_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - xfs_fsblock_t *first, /* pointer to firstblock variable */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - -/* * Called by xfs_bmap_add_extent to handle cases converting a delayed * allocation to a real allocation. */ STATIC int /* error */ xfs_bmap_add_extent_delay_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ + int *logflagsp); /* inode logging flags */ /* * Called by xfs_bmap_add_extent to handle cases converting a hole @@ -127,10 +110,9 @@ xfs_bmap_add_extent_delay_real( STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int rsvd); /* OK to allocate reserved blocks */ + int *logflagsp); /* inode logging flags */ /* * Called by xfs_bmap_add_extent to handle cases converting a hole @@ -139,7 +121,7 @@ xfs_bmap_add_extent_hole_delay( STATIC int /* error */ xfs_bmap_add_extent_hole_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ @@ -152,7 +134,7 @@ xfs_bmap_add_extent_hole_real( STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp); /* inode logging flags */ @@ -180,22 +162,6 @@ xfs_bmap_btree_to_extents( int whichfork); /* data or attr fork */ /* - * Called by xfs_bmapi to update file extent records and the btree - * after removing space (or undoing a delayed allocation). - */ -STATIC int /* error */ -xfs_bmap_del_extent( - xfs_inode_t *ip, /* incore inode pointer */ - xfs_trans_t *tp, /* current trans pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ - xfs_bmap_free_t *flist, /* list of extents to be freed */ - xfs_btree_cur_t *cur, /* if null, not a btree */ - xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp,/* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd); /* OK to allocate reserved blocks */ - -/* * Remove the entry "free" from the free item list. Prev points to the * previous entry, unless "free" is the head of the list. */ @@ -474,14 +440,13 @@ xfs_bmap_add_attrfork_local( STATIC int /* error */ xfs_bmap_add_extent( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to use reserved data blocks */ + int whichfork) /* data or attr fork */ { xfs_btree_cur_t *cur; /* btree cursor or null */ xfs_filblks_t da_new; /* new count del alloc blocks used */ @@ -492,23 +457,27 @@ xfs_bmap_add_extent( xfs_extnum_t nextents; /* number of extents in file now */ XFS_STATS_INC(xs_add_exlist); + cur = *curp; ifp = XFS_IFORK_PTR(ip, whichfork); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - ASSERT(idx <= nextents); da_old = da_new = 0; error = 0; + + ASSERT(*idx >= 0); + ASSERT(*idx <= nextents); + /* * This is the first extent added to a new/empty file. * Special case this one, so other routines get to assume there are * already extents in the list. */ if (nextents == 0) { - xfs_iext_insert(ip, 0, 1, new, + xfs_iext_insert(ip, *idx, 1, new, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); ASSERT(cur == NULL); - ifp->if_lastex = 0; + if (!isnullstartblock(new->br_startblock)) { XFS_IFORK_NEXT_SET(ip, whichfork, 1); logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); @@ -522,27 +491,25 @@ xfs_bmap_add_extent( if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, - &logflags, rsvd))) - goto done; + error = xfs_bmap_add_extent_hole_delay(ip, idx, new, + &logflags); } /* * Real allocation off the end of the file. */ - else if (idx == nextents) { + else if (*idx == nextents) { if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, - &logflags, whichfork))) - goto done; + error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, + &logflags, whichfork); } else { xfs_bmbt_irec_t prev; /* old extent at offset idx */ /* * Get the record referred to by idx. */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev); /* * If it's a real allocation record, and the new allocation ends * after the start of the referred to record, then we're filling @@ -557,22 +524,18 @@ xfs_bmap_add_extent( if (cur) ASSERT(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL); - if ((error = xfs_bmap_add_extent_delay_real(ip, - idx, &cur, new, &da_new, first, flist, - &logflags, rsvd))) - goto done; - } else if (new->br_state == XFS_EXT_NORM) { - ASSERT(new->br_state == XFS_EXT_NORM); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) - goto done; + error = xfs_bmap_add_extent_delay_real(ip, + idx, &cur, new, &da_new, + first, flist, &logflags); } else { - ASSERT(new->br_state == XFS_EXT_UNWRITTEN); - if ((error = xfs_bmap_add_extent_unwritten_real( - ip, idx, &cur, new, &logflags))) + ASSERT(new->br_state == XFS_EXT_NORM || + new->br_state == XFS_EXT_UNWRITTEN); + + error = xfs_bmap_add_extent_unwritten_real(ip, + idx, &cur, new, &logflags); + if (error) goto done; } - ASSERT(*curp == cur || *curp == NULL); } /* * Otherwise we're filling in a hole with an allocation. @@ -581,13 +544,15 @@ xfs_bmap_add_extent( if (cur) ASSERT((cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL) == 0); - if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, - new, &logflags, whichfork))) - goto done; + error = xfs_bmap_add_extent_hole_real(ip, idx, cur, + new, &logflags, whichfork); } } + if (error) + goto done; ASSERT(*curp == cur || *curp == NULL); + /* * Convert to a btree if necessary. */ @@ -615,7 +580,7 @@ xfs_bmap_add_extent( ASSERT(nblks <= da_old); if (nblks < da_old) xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - nblks), rsvd); + (int64_t)(da_old - nblks), 0); } /* * Clear out the allocated field, done with it now in any case. @@ -640,14 +605,13 @@ done: STATIC int /* error */ xfs_bmap_add_extent_delay_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ xfs_fsblock_t *first, /* pointer to firstblock variable */ xfs_bmap_free_t *flist, /* list of extents to be freed */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to use reserved data block allocation */ + int *logflagsp) /* inode logging flags */ { xfs_btree_cur_t *cur; /* btree cursor */ int diff; /* temp value */ @@ -673,7 +637,7 @@ xfs_bmap_add_extent_delay_real( */ cur = *curp; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &PREV); new_endoff = new->br_startoff + new->br_blockcount; ASSERT(PREV.br_startoff <= new->br_startoff); @@ -692,9 +656,9 @@ xfs_bmap_add_extent_delay_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -712,9 +676,9 @@ xfs_bmap_add_extent_delay_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -745,14 +709,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 2, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -784,13 +748,14 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -814,14 +779,13 @@ xfs_bmap_add_extent_delay_real( * Filling in all of a previously delayed allocation extent. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -837,6 +801,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, PREV.br_state))) goto done; } + *dnew = 0; break; @@ -846,11 +811,10 @@ xfs_bmap_add_extent_delay_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -866,6 +830,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } + *dnew = 0; break; @@ -874,17 +839,16 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -904,7 +868,9 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; *dnew = temp; break; @@ -913,12 +879,11 @@ xfs_bmap_add_extent_delay_real( * Filling in the first part of a previous delayed allocation. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, new_endoff); temp = PREV.br_blockcount - new->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -946,9 +911,10 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx + 1); + ep = xfs_iext_get_ext(ifp, *idx + 1); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); + *dnew = temp; break; @@ -958,15 +924,13 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is contiguous with the new allocation. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, RIGHT.br_state); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; + trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -983,10 +947,14 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_state))) goto done; } + temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock)); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; *dnew = temp; break; @@ -996,10 +964,9 @@ xfs_bmap_add_extent_delay_real( * The right neighbor is not contiguous. */ temp = PREV.br_blockcount - new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; + xfs_iext_insert(ip, *idx + 1, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1027,9 +994,11 @@ xfs_bmap_add_extent_delay_real( temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; *dnew = temp; break; @@ -1056,7 +1025,7 @@ xfs_bmap_add_extent_delay_real( */ temp = new->br_startoff - PREV.br_startoff; temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; - trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ LEFT = *new; RIGHT.br_state = PREV.br_state; @@ -1065,8 +1034,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_startoff = new_endoff; RIGHT.br_blockcount = temp2; /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ - xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); - ip->i_df.if_lastex = idx + 1; + xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1097,7 +1065,7 @@ xfs_bmap_add_extent_delay_real( (cur ? cur->bc_private.b.allocated : 0)); if (diff > 0 && xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) { + -((int64_t)diff), 0)) { /* * Ick gross gag me with a spoon. */ @@ -1109,7 +1077,7 @@ xfs_bmap_add_extent_delay_real( if (!diff || !xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) + -((int64_t)diff), 0)) break; } if (temp2) { @@ -1118,18 +1086,20 @@ xfs_bmap_add_extent_delay_real( if (!diff || !xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - -((int64_t)diff), rsvd)) + -((int64_t)diff), 0)) break; } } } - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_); + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2), nullstartblock((int)temp2)); - trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_); + + ++*idx; *dnew = temp + temp2; break; @@ -1161,7 +1131,7 @@ done: STATIC int /* error */ xfs_bmap_add_extent_unwritten_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp) /* inode logging flags */ @@ -1188,7 +1158,7 @@ xfs_bmap_add_extent_unwritten_real( error = 0; cur = *curp; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &PREV); newext = new->br_state; oldext = (newext == XFS_EXT_UNWRITTEN) ? @@ -1211,9 +1181,9 @@ xfs_bmap_add_extent_unwritten_real( * Check and set flags if this segment has a left neighbor. * Don't set contiguous if the combined extent would be too large. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); if (isnullstartblock(LEFT.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1231,9 +1201,9 @@ xfs_bmap_add_extent_unwritten_real( * Don't set contiguous if the combined extent would be too large. * Also check for all-three-contiguous being too large. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); if (isnullstartblock(RIGHT.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1262,14 +1232,15 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left and right neighbors are both contiguous with new. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount + RIGHT.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 2, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 2, state); ip->i_d.di_nextents -= 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1305,13 +1276,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The left neighbor is contiguous, the right is not. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), LEFT.br_blockcount + PREV.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx - 1; - xfs_iext_remove(ip, idx, 1, state); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1341,13 +1313,12 @@ xfs_bmap_add_extent_unwritten_real( * Setting all of a previous oldext extent to newext. * The right neighbor is contiguous, the left is not. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount + RIGHT.br_blockcount); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; - xfs_iext_remove(ip, idx + 1, 1, state); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_remove(ip, *idx + 1, 1, state); ip->i_d.di_nextents--; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1378,11 +1349,10 @@ xfs_bmap_add_extent_unwritten_real( * Neither the left nor right neighbors are contiguous with * the new one. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_state(ep, newext); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1404,21 +1374,22 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is contiguous. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), LEFT.br_blockcount + new->br_blockcount); xfs_bmbt_set_startoff(ep, PREV.br_startoff + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + --*idx; - ip->i_df.if_lastex = idx - 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1449,17 +1420,16 @@ xfs_bmap_add_extent_unwritten_real( * Setting the first part of a previous oldext extent to newext. * The left neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); xfs_bmbt_set_startoff(ep, new_endoff); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); xfs_bmbt_set_startblock(ep, new->br_startblock + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1488,17 +1458,19 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is contiguous with the new allocation. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, newext); - trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ip->i_df.if_lastex = idx + 1; if (cur == NULL) rval = XFS_ILOG_DEXT; else { @@ -1528,13 +1500,14 @@ xfs_bmap_add_extent_unwritten_real( * Setting the last part of a previous oldext extent to newext. * The right neighbor is not contiguous. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, PREV.br_blockcount - new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, new, state); - xfs_iext_insert(ip, idx + 1, 1, new, state); - ip->i_df.if_lastex = idx + 1; ip->i_d.di_nextents++; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1568,10 +1541,10 @@ xfs_bmap_add_extent_unwritten_real( * newext. Contiguity is impossible here. * One extent becomes three extents. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, new->br_startoff - PREV.br_startoff); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); r[0] = *new; r[1].br_startoff = new_endoff; @@ -1579,8 +1552,10 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startoff + PREV.br_blockcount - new_endoff; r[1].br_startblock = new->br_startblock + new->br_blockcount; r[1].br_state = oldext; - xfs_iext_insert(ip, idx + 1, 2, &r[0], state); - ip->i_df.if_lastex = idx + 1; + + ++*idx; + xfs_iext_insert(ip, *idx, 2, &r[0], state); + ip->i_d.di_nextents += 2; if (cur == NULL) rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; @@ -1650,12 +1625,10 @@ done: STATIC int /* error */ xfs_bmap_add_extent_hole_delay( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ - int *logflagsp, /* inode logging flags */ - int rsvd) /* OK to allocate reserved blocks */ + int *logflagsp) /* inode logging flags */ { - xfs_bmbt_rec_host_t *ep; /* extent record for idx */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_bmbt_irec_t left; /* left neighbor extent entry */ xfs_filblks_t newlen=0; /* new indirect size */ @@ -1665,16 +1638,15 @@ xfs_bmap_add_extent_hole_delay( xfs_filblks_t temp=0; /* temp for indirect calculations */ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - ep = xfs_iext_get_ext(ifp, idx); state = 0; ASSERT(isnullstartblock(new->br_startblock)); /* * Check and set flags if this segment has a left neighbor */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; @@ -1684,9 +1656,9 @@ xfs_bmap_add_extent_hole_delay( * Check and set flags if the current (right) segment exists. * If it doesn't exist, we're converting the hole at end-of-file. */ - if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; @@ -1719,21 +1691,21 @@ xfs_bmap_add_extent_hole_delay( * on the left and on the right. * Merge all three into a single extent record. */ + --*idx; temp = left.br_blockcount + new->br_blockcount + right.br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - xfs_iext_remove(ip, idx, 1, state); - ip->i_df.if_lastex = idx - 1; + xfs_iext_remove(ip, *idx + 1, 1, state); break; case BMAP_LEFT_CONTIG: @@ -1742,17 +1714,17 @@ xfs_bmap_add_extent_hole_delay( * on the left. * Merge the new allocation with the left neighbor. */ + --*idx; temp = left.br_blockcount + new->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); + + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); oldlen = startblockval(left.br_startblock) + startblockval(new->br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), + xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), nullstartblock((int)newlen)); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); - - ip->i_df.if_lastex = idx - 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case BMAP_RIGHT_CONTIG: @@ -1761,16 +1733,15 @@ xfs_bmap_add_extent_hole_delay( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); temp = new->br_blockcount + right.br_blockcount; oldlen = startblockval(new->br_startblock) + startblockval(right.br_startblock); newlen = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_allf(ep, new->br_startoff, + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, nullstartblock((int)newlen), temp, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - - ip->i_df.if_lastex = idx; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); break; case 0: @@ -1780,14 +1751,13 @@ xfs_bmap_add_extent_hole_delay( * Insert a new entry. */ oldlen = newlen = 0; - xfs_iext_insert(ip, idx, 1, new, state); - ip->i_df.if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); break; } if (oldlen != newlen) { ASSERT(oldlen > newlen); xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), rsvd); + (int64_t)(oldlen - newlen), 0); /* * Nothing to do for disk quota accounting here. */ @@ -1803,13 +1773,12 @@ xfs_bmap_add_extent_hole_delay( STATIC int /* error */ xfs_bmap_add_extent_hole_real( xfs_inode_t *ip, /* incore inode pointer */ - xfs_extnum_t idx, /* extent number to update/insert */ + xfs_extnum_t *idx, /* extent number to update/insert */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *new, /* new data to add to file extents */ int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { - xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ int error; /* error return value */ int i; /* temp state */ xfs_ifork_t *ifp; /* inode fork pointer */ @@ -1819,8 +1788,7 @@ xfs_bmap_add_extent_hole_real( int state; /* state bits, accessed thru macros */ ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); - ep = xfs_iext_get_ext(ifp, idx); + ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); state = 0; if (whichfork == XFS_ATTR_FORK) @@ -1829,9 +1797,9 @@ xfs_bmap_add_extent_hole_real( /* * Check and set flags if this segment has a left neighbor. */ - if (idx > 0) { + if (*idx > 0) { state |= BMAP_LEFT_VALID; - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); if (isnullstartblock(left.br_startblock)) state |= BMAP_LEFT_DELAY; } @@ -1840,9 +1808,9 @@ xfs_bmap_add_extent_hole_real( * Check and set flags if this segment has a current value. * Not true if we're inserting into the "hole" at eof. */ - if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { + if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { state |= BMAP_RIGHT_VALID; - xfs_bmbt_get_all(ep, &right); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); if (isnullstartblock(right.br_startblock)) state |= BMAP_RIGHT_DELAY; } @@ -1879,14 +1847,15 @@ xfs_bmap_add_extent_hole_real( * left and on the right. * Merge all three into a single extent record. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount + right.br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + xfs_iext_remove(ip, *idx + 1, 1, state); - xfs_iext_remove(ip, idx, 1, state); - ifp->if_lastex = idx - 1; XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); if (cur == NULL) { @@ -1921,12 +1890,12 @@ xfs_bmap_add_extent_hole_real( * on the left. * Merge the new allocation with the left neighbor. */ - trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); - xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), + --*idx; + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), left.br_blockcount + new->br_blockcount); - trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ifp->if_lastex = idx - 1; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { @@ -1952,13 +1921,13 @@ xfs_bmap_add_extent_hole_real( * on the right. * Merge the new allocation with the right neighbor. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); - xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), + new->br_startoff, new->br_startblock, new->br_blockcount + right.br_blockcount, right.br_state); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); - ifp->if_lastex = idx; if (cur == NULL) { rval = xfs_ilog_fext(whichfork); } else { @@ -1984,8 +1953,7 @@ xfs_bmap_add_extent_hole_real( * real allocation. * Insert a new entry. */ - xfs_iext_insert(ip, idx, 1, new, state); - ifp->if_lastex = idx; + xfs_iext_insert(ip, *idx, 1, new, state); XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); if (cur == NULL) { @@ -2365,6 +2333,13 @@ xfs_bmap_rtalloc( */ if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; + + /* + * Lock out other modifications to the RT bitmap inode. + */ + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); + /* * If it's an allocation to an empty file at offset 0, * pick an extent that will space things out in the rt area. @@ -2826,13 +2801,12 @@ STATIC int /* error */ xfs_bmap_del_extent( xfs_inode_t *ip, /* incore inode pointer */ xfs_trans_t *tp, /* current transaction pointer */ - xfs_extnum_t idx, /* extent number to update/delete */ + xfs_extnum_t *idx, /* extent number to update/delete */ xfs_bmap_free_t *flist, /* list of extents to be freed */ xfs_btree_cur_t *cur, /* if null, not a btree */ xfs_bmbt_irec_t *del, /* data to remove from extents */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - int rsvd) /* OK to allocate reserved blocks */ + int whichfork) /* data or attr fork */ { xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ @@ -2863,10 +2837,10 @@ xfs_bmap_del_extent( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); - ASSERT((idx >= 0) && (idx < ifp->if_bytes / + ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(del->br_blockcount > 0); - ep = xfs_iext_get_ext(ifp, idx); + ep = xfs_iext_get_ext(ifp, *idx); xfs_bmbt_get_all(ep, &got); ASSERT(got.br_startoff <= del->br_startoff); del_endoff = del->br_startoff + del->br_blockcount; @@ -2940,11 +2914,12 @@ xfs_bmap_del_extent( /* * Matches the whole extent. Delete the entry. */ - xfs_iext_remove(ip, idx, 1, + xfs_iext_remove(ip, *idx, 1, whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); - ifp->if_lastex = idx; + --*idx; if (delay) break; + XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); flags |= XFS_ILOG_CORE; @@ -2961,21 +2936,20 @@ xfs_bmap_del_extent( /* * Deleting the first part of the extent. */ - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_startoff(ep, del_endoff); temp = got.br_blockcount - del->br_blockcount; xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -2991,18 +2965,17 @@ xfs_bmap_del_extent( * Deleting the last part of the extent. */ temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); - ifp->if_lastex = idx; if (delay) { temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), da_old); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); da_new = temp; break; } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); if (!cur) { flags |= xfs_ilog_fext(whichfork); break; @@ -3019,7 +2992,7 @@ xfs_bmap_del_extent( * Deleting the middle of the extent. */ temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); xfs_bmbt_set_blockcount(ep, temp); new.br_startoff = del_endoff; temp2 = got_endoff - del_endoff; @@ -3106,9 +3079,9 @@ xfs_bmap_del_extent( } } } - trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); - xfs_iext_insert(ip, idx + 1, 1, &new, state); - ifp->if_lastex = idx + 1; + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + xfs_iext_insert(ip, *idx + 1, 1, &new, state); + ++*idx; break; } /* @@ -3135,7 +3108,7 @@ xfs_bmap_del_extent( ASSERT(da_old >= da_new); if (da_old > da_new) { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), rsvd); + (int64_t)(da_old - da_new), 0); } done: *logflagsp = flags; @@ -3519,7 +3492,7 @@ xfs_bmap_search_extents( if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { - xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, + xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " "blkcnt: %llx extent-state: %x lastx: %x\n", @@ -4193,12 +4166,11 @@ xfs_bmap_read_extents( num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { ASSERT(i + num_recs <= room); - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, (btree extents).", (unsigned long long) ip->i_ino); - XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", - XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)", + XFS_ERRLEVEL_LOW, ip->i_mount, block); goto error0; } XFS_WANT_CORRUPTED_GOTO( @@ -4556,29 +4528,24 @@ xfs_bmapi( if (rt) { error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - -((int64_t)extsz), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)extsz), 0); } else { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)alen), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)alen), 0); } if (!error) { error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)indlen), (flags & - XFS_BMAPI_RSVBLOCKS)); + -((int64_t)indlen), 0); if (error && rt) xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)extsz, (flags & - XFS_BMAPI_RSVBLOCKS)); + (int64_t)extsz, 0); else if (error) xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)alen, (flags & - XFS_BMAPI_RSVBLOCKS)); + (int64_t)alen, 0); } if (error) { @@ -4695,13 +4662,12 @@ xfs_bmapi( if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) got.br_state = XFS_EXT_UNWRITTEN; } - error = xfs_bmap_add_extent(ip, lastx, &cur, &got, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); + whichfork); logflags |= tmp_logflags; if (error) goto error0; - lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); @@ -4797,13 +4763,12 @@ xfs_bmapi( mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, mval, + error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, firstblock, flist, &tmp_logflags, - whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); + whichfork); logflags |= tmp_logflags; if (error) goto error0; - lastx = ifp->if_lastex; ep = xfs_iext_get_ext(ifp, lastx); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); xfs_bmbt_get_all(ep, &got); @@ -4862,14 +4827,14 @@ xfs_bmapi( /* * Else go on to the next record. */ - ep = xfs_iext_get_ext(ifp, ++lastx); prev = got; - if (lastx >= nextents) - eof = 1; - else + if (++lastx < nextents) { + ep = xfs_iext_get_ext(ifp, lastx); xfs_bmbt_get_all(ep, &got); + } else { + eof = 1; + } } - ifp->if_lastex = lastx; *nmap = n; /* * Transform from btree to extents, give it cur. @@ -4978,7 +4943,6 @@ xfs_bmapi_single( ASSERT(!isnullstartblock(got.br_startblock)); ASSERT(bno < got.br_startoff + got.br_blockcount); *fsb = got.br_startblock + (bno - got.br_startoff); - ifp->if_lastex = lastx; return 0; } @@ -5020,7 +4984,6 @@ xfs_bunmapi( int tmp_logflags; /* partial logging flags */ int wasdel; /* was a delayed alloc extent */ int whichfork; /* data or attribute fork */ - int rsvd; /* OK to allocate reserved blocks */ xfs_fsblock_t sum; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5038,7 +5001,7 @@ xfs_bunmapi( mp = ip->i_mount; if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; + ASSERT(len > 0); ASSERT(nexts >= 0); ASSERT(ifp->if_ext_max == @@ -5154,9 +5117,9 @@ xfs_bunmapi( del.br_blockcount = mod; } del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, &del, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; @@ -5182,9 +5145,12 @@ xfs_bunmapi( */ ASSERT(bno >= del.br_blockcount); bno -= del.br_blockcount; - if (bno < got.br_startoff) { - if (--lastx >= 0) - xfs_bmbt_get_all(--ep, &got); + if (got.br_startoff > bno) { + if (--lastx >= 0) { + ep = xfs_iext_get_ext(ifp, + lastx); + xfs_bmbt_get_all(ep, &got); + } } continue; } else if (del.br_state == XFS_EXT_UNWRITTEN) { @@ -5208,18 +5174,19 @@ xfs_bunmapi( prev.br_startoff = start; } prev.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx - 1, &cur, + lastx--; + error = xfs_bmap_add_extent(ip, &lastx, &cur, &prev, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; } else { ASSERT(del.br_state == XFS_EXT_NORM); del.br_state = XFS_EXT_UNWRITTEN; - error = xfs_bmap_add_extent(ip, lastx, &cur, + error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, firstblock, flist, &logflags, - XFS_DATA_FORK, 0); + XFS_DATA_FORK); if (error) goto error0; goto nodelete; @@ -5234,13 +5201,13 @@ xfs_bunmapi( rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, rsvd); + (int64_t)rtexts, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, rsvd); + (int64_t)del.br_blockcount, 0); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); @@ -5271,31 +5238,29 @@ xfs_bunmapi( error = XFS_ERROR(ENOSPC); goto error0; } - error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, - &tmp_logflags, whichfork, rsvd); + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, + &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; bno = del.br_startoff - 1; nodelete: - lastx = ifp->if_lastex; /* * If not done go on to the next (previous) record. - * Reset ep in case the extents array was re-alloced. */ - ep = xfs_iext_get_ext(ifp, lastx); if (bno != (xfs_fileoff_t)-1 && bno >= start) { - if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || - xfs_bmbt_get_startoff(ep) > bno) { - if (--lastx >= 0) - ep = xfs_iext_get_ext(ifp, lastx); - } - if (lastx >= 0) + if (lastx >= 0) { + ep = xfs_iext_get_ext(ifp, lastx); + if (xfs_bmbt_get_startoff(ep) > bno) { + if (--lastx >= 0) + ep = xfs_iext_get_ext(ifp, + lastx); + } xfs_bmbt_get_all(ep, &got); + } extno++; } } - ifp->if_lastex = lastx; *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; ASSERT(ifp->if_ext_max == XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); @@ -5772,7 +5737,7 @@ xfs_check_block( else thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); if (*thispa == *pp) { - cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", + xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", __func__, j, i, (unsigned long long)be64_to_cpu(*thispa)); panic("%s: ptrs are equal in node\n", @@ -5937,11 +5902,11 @@ xfs_bmap_check_leaf_extents( return; error0: - cmn_err(CE_WARN, "%s: at error0", __func__); + xfs_warn(mp, "%s: at error0", __func__); if (bp_release) xfs_trans_brelse(NULL, bp); error_norelse: - cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", + xfs_warn(mp, "%s: BAD after btree leaves for %d extents", __func__, i); panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); return; @@ -6144,7 +6109,7 @@ xfs_bmap_punch_delalloc_range( if (error) { /* something screwed, just bail */ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_fs_cmn_err(CE_ALERT, ip->i_mount, + xfs_alert(ip->i_mount, "Failed delalloc mapping lookup ino %lld fsb %lld.", ip->i_ino, start_fsb); } diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 3651191daea1..c62234bde053 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -69,7 +69,6 @@ typedef struct xfs_bmap_free #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ -#define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ /* combine contig. space */ @@ -87,7 +86,6 @@ typedef struct xfs_bmap_free { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ - { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \ diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 6f8c21ce0d6d..7b7e005e3dcc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -130,10 +130,12 @@ xfs_buf_item_log_check( orig = bip->bli_orig; buffer = XFS_BUF_PTR(bp); for (x = 0; x < XFS_BUF_COUNT(bp); x++) { - if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) - cmn_err(CE_PANIC, - "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", - bip, bp, orig, x); + if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) { + xfs_emerg(bp->b_mount, + "%s: bip %x buffer %x orig %x index %d", + __func__, bip, bp, orig, x); + ASSERT(0); + } } } #else @@ -983,15 +985,14 @@ xfs_buf_iodone_callbacks( if (XFS_BUF_TARGET(bp) != lasttarg || time_after(jiffies, (lasttime + 5*HZ))) { lasttime = jiffies; - cmn_err(CE_ALERT, "Device %s, XFS metadata write error" - " block 0x%llx in %s", + xfs_alert(mp, "Device %s: metadata write error block 0x%llx", XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), - (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); + (__uint64_t)XFS_BUF_ADDR(bp)); } lasttarg = XFS_BUF_TARGET(bp); /* - * If the write was asynchronous then noone will be looking for the + * If the write was asynchronous then no one will be looking for the * error. Clear the error state and write the buffer out again. * * During sync or umount we'll write all pending buffers again diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 1c00bedb3175..6102ac6d1dff 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1995,13 +1995,12 @@ xfs_da_do_buf( error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); if (unlikely(error == EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", - (long long)bno); - cmn_err(CE_ALERT, "dir: inode %lld\n", + xfs_alert(mp, "%s: bno %lld dir: inode %lld", + __func__, (long long)bno, (long long)dp->i_ino); for (i = 0; i < nmap; i++) { - cmn_err(CE_ALERT, - "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", + xfs_alert(mp, +"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", i, (long long)mapp[i].br_startoff, (long long)mapp[i].br_startblock, diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e60490bc00a6..9a84a85c03b1 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -202,7 +202,7 @@ xfs_swap_extents( xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { - xfs_mount_t *mp; + xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; @@ -212,16 +212,12 @@ xfs_swap_extents( int taforkblks = 0; __uint64_t tmp; - mp = ip->i_mount; - tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto out; } - sbp = &sxp->sx_stat; - /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will @@ -270,9 +266,9 @@ xfs_swap_extents( /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { - xfs_fs_cmn_err(CE_NOTE, mp, + xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", - __FILE__, ip->i_ino); + __func__, ip->i_ino); goto out_unlock; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index a1321bc7f192..dba7a71cedf3 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -159,7 +159,7 @@ xfs_dir_ino_validate( XFS_AGINO_TO_INO(mp, agno, agino) == ino; if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, XFS_RANDOM_DIR_INO_VALIDATE))) { - xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", + xfs_warn(mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index f9a0864b696a..a0aab7d3294f 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance( if(blk2->index < 0) { state->inleaf = 1; blk2->index = 0; - cmn_err(CE_ALERT, - "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " - "blk1->index %d\n", - blk1->index); + xfs_alert(args->dp->i_mount, + "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", + __func__, blk1->index); } } @@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int( } if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { - cmn_err(CE_ALERT, - "xfs_dir2_node_addname_int: dir ino " - "%llu needed freesp block %lld for\n" - " data block %lld, got %lld\n" - " ifbno %llu lastfbno %d\n", - (unsigned long long)dp->i_ino, + xfs_alert(mp, + "%s: dir ino " "%llu needed freesp block %lld for\n" + " data block %lld, got %lld ifbno %llu lastfbno %d", + __func__, (unsigned long long)dp->i_ino, (long long)xfs_dir2_db_to_fdb(mp, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { - cmn_err(CE_ALERT, - " fblk 0x%p blkno %llu " - "index %d magic 0x%x\n", + xfs_alert(mp, + " fblk 0x%p blkno %llu index %d magic 0x%x", fblk, (unsigned long long)fblk->blkno, fblk->index, fblk->magic); } else { - cmn_err(CE_ALERT, - " ... fblk is NULL\n"); + xfs_alert(mp, " ... fblk is NULL"); } XFS_ERROR_REPORT("xfs_dir2_node_addname_int", XFS_ERRLEVEL_LOW, mp); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 4c7db74a05f7..39f06336b99d 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -48,7 +48,7 @@ xfs_error_trap(int e) break; if (e != xfs_etrap[i]) continue; - cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); + xfs_notice(NULL, "%s: error %d", __func__, e); BUG(); break; } @@ -74,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression, for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { - cmn_err(CE_WARN, + xfs_warn(NULL, "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", expression, file, line, xfs_etest_fsname[i]); return 1; @@ -95,14 +95,14 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { - cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); + xfs_warn(mp, "error tag #%d on", error_tag); return 0; } } for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { if (xfs_etest[i] == 0) { - cmn_err(CE_WARN, "Turned on XFS error tag #%d", + xfs_warn(mp, "Turned on XFS error tag #%d", error_tag); xfs_etest[i] = error_tag; xfs_etest_fsid[i] = fsid; @@ -114,7 +114,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) } } - cmn_err(CE_WARN, "error tag overflow, too many turned on"); + xfs_warn(mp, "error tag overflow, too many turned on"); return 1; } @@ -133,7 +133,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && xfs_etest[i] != 0) { cleared = 1; - cmn_err(CE_WARN, "Clearing XFS error tag #%d", + xfs_warn(mp, "Clearing XFS error tag #%d", xfs_etest[i]); xfs_etest[i] = 0; xfs_etest_fsid[i] = 0LL; @@ -144,9 +144,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) } if (loud || cleared) - cmn_err(CE_WARN, - "Cleared all XFS error tags for filesystem \"%s\"", - mp->m_fsname); + xfs_warn(mp, "Cleared all XFS error tags for filesystem"); return 0; } @@ -162,9 +160,8 @@ xfs_error_report( inst_t *ra) { if (level <= xfs_error_level) { - xfs_cmn_err(XFS_PTAG_ERROR_REPORT, - CE_ALERT, mp, - "XFS internal error %s at line %d of file %s. Caller 0x%p\n", + xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, + "Internal error %s at line %d of file %s. Caller 0x%p\n", tag, linenum, filename, ra); xfs_stack_trace(); @@ -184,4 +181,5 @@ xfs_corruption_error( if (level <= xfs_error_level) xfs_hex_dump(p, 16); xfs_error_report(tag, level, mp, filename, linenum, ra); + xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 10dce5475f02..079a367f44ee 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -145,10 +145,8 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); #endif /* DEBUG */ /* - * XFS panic tags -- allow a call to xfs_cmn_err() be turned into - * a panic by setting xfs_panic_mask in a - * sysctl. update xfs_max[XFS_PARAM] if - * more are added. + * XFS panic tags -- allow a call to xfs_alert_tag() be turned into + * a panic by setting xfs_panic_mask in a sysctl. */ #define XFS_NO_PTAG 0 #define XFS_PTAG_IFLUSH 0x00000001 @@ -160,17 +158,4 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud); #define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 -struct xfs_mount; - -extern void xfs_hex_dump(void *p, int length); - -#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \ - xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args) - -#define xfs_fs_mount_cmn_err(f, fmt, args...) \ - do { \ - if (!(f & XFS_MFSI_QUIET)) \ - cmn_err(CE_WARN, "XFS: " fmt, ## args); \ - } while (0) - #endif /* __XFS_ERROR_H__ */ diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index cec89dd5d7d2..9153d2c77caf 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -53,6 +53,9 @@ xfs_fs_geometry( xfs_fsop_geom_t *geo, int new_version) { + + memset(geo, 0, sizeof(*geo)); + geo->blocksize = mp->m_sb.sb_blocksize; geo->rtextsize = mp->m_sb.sb_rextsize; geo->agblocks = mp->m_sb.sb_agblocks; @@ -382,8 +385,8 @@ xfs_growfs_data_private( XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp); if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "error %d reading secondary superblock for ag %d", + xfs_warn(mp, + "error %d reading secondary superblock for ag %d", error, agno); break; } @@ -396,7 +399,7 @@ xfs_growfs_data_private( if (!(error = xfs_bwrite(mp, bp))) { continue; } else { - xfs_fs_cmn_err(CE_WARN, mp, + xfs_warn(mp, "write error %d updating secondary superblock for ag %d", error, agno); break; /* no point in continuing */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0626a32c3447..84ebeec16642 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1055,28 +1055,23 @@ xfs_difree( */ agno = XFS_INO_TO_AGNO(mp, inode); if (agno >= mp->m_sb.sb_agcount) { - cmn_err(CE_WARN, - "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", - agno, mp->m_sb.sb_agcount, mp->m_fsname); + xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", + __func__, agno, mp->m_sb.sb_agcount); ASSERT(0); return XFS_ERROR(EINVAL); } agino = XFS_INO_TO_AGINO(mp, inode); if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { - cmn_err(CE_WARN, - "xfs_difree: inode != XFS_AGINO_TO_INO() " - "(%llu != %llu) on %s. Returning EINVAL.", - (unsigned long long)inode, - (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino), - mp->m_fsname); + xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", + __func__, (unsigned long long)inode, + (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); ASSERT(0); return XFS_ERROR(EINVAL); } agbno = XFS_AGINO_TO_AGBNO(mp, agino); if (agbno >= mp->m_sb.sb_agblocks) { - cmn_err(CE_WARN, - "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", - agbno, mp->m_sb.sb_agblocks, mp->m_fsname); + xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", + __func__, agbno, mp->m_sb.sb_agblocks); ASSERT(0); return XFS_ERROR(EINVAL); } @@ -1085,9 +1080,8 @@ xfs_difree( */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) { - cmn_err(CE_WARN, - "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", + __func__, error); return error; } agi = XFS_BUF_TO_AGI(agbp); @@ -1106,17 +1100,15 @@ xfs_difree( * Look for the entry describing this inode. */ if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { - cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.", + __func__, error); goto error0; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); error = xfs_inobt_get_rec(cur, &rec, &i); if (error) { - cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", + __func__, error); goto error0; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); @@ -1157,8 +1149,8 @@ xfs_difree( xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); if ((error = xfs_btree_delete(cur, &i))) { - cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_btree_delete returned error %d.", + __func__, error); goto error0; } @@ -1170,9 +1162,8 @@ xfs_difree( error = xfs_inobt_update(cur, &rec); if (error) { - cmn_err(CE_WARN, - "xfs_difree: xfs_inobt_update returned an error %d on %s.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_inobt_update returned error %d.", + __func__, error); goto error0; } @@ -1218,10 +1209,9 @@ xfs_imap_lookup( error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "xfs_ialloc_read_agi() returned " - "error %d, agno %d", - error, agno); + xfs_alert(mp, + "%s: xfs_ialloc_read_agi() returned error %d, agno %d", + __func__, error, agno); return error; } @@ -1299,24 +1289,21 @@ xfs_imap( if (flags & XFS_IGET_UNTRUSTED) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: agno (%d) >= " - "mp->m_sb.sb_agcount (%d)", - agno, mp->m_sb.sb_agcount); + xfs_alert(mp, + "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", + __func__, agno, mp->m_sb.sb_agcount); } if (agbno >= mp->m_sb.sb_agblocks) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: agbno (0x%llx) >= " - "mp->m_sb.sb_agblocks (0x%lx)", - (unsigned long long) agbno, - (unsigned long) mp->m_sb.sb_agblocks); + xfs_alert(mp, + "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", + __func__, (unsigned long long)agbno, + (unsigned long)mp->m_sb.sb_agblocks); } if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_imap: ino (0x%llx) != " - "XFS_AGINO_TO_INO(mp, agno, agino) " - "(0x%llx)", - ino, XFS_AGINO_TO_INO(mp, agno, agino)); + xfs_alert(mp, + "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", + __func__, ino, + XFS_AGINO_TO_INO(mp, agno, agino)); } xfs_stack_trace(); #endif /* DEBUG */ @@ -1388,10 +1375,9 @@ out_map: */ if ((imap->im_blkno + imap->im_len) > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " - " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", - (unsigned long long) imap->im_blkno, + xfs_alert(mp, + "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)", + __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); return XFS_ERROR(EINVAL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index be7cf625421f..a098a20ca63e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -110,8 +110,8 @@ xfs_inobp_check( dip = (xfs_dinode_t *)xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { - xfs_fs_cmn_err(CE_ALERT, mp, - "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", + xfs_alert(mp, + "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.", bp); ASSERT(dip->di_next_unlinked); } @@ -142,10 +142,9 @@ xfs_imap_to_bp( (int)imap->im_len, buf_flags, &bp); if (error) { if (error != EAGAIN) { - cmn_err(CE_WARN, - "xfs_imap_to_bp: xfs_trans_read_buf()returned " - "an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, + "%s: xfs_trans_read_buf() returned error %d.", + __func__, error); } else { ASSERT(buf_flags & XBF_TRYLOCK); } @@ -180,12 +179,11 @@ xfs_imap_to_bp( XFS_CORRUPTION_ERROR("xfs_imap_to_bp", XFS_ERRLEVEL_HIGH, mp, dip); #ifdef DEBUG - cmn_err(CE_PANIC, - "Device %s - bad inode magic/vsn " - "daddr %lld #%d (magic=%x)", - XFS_BUFTARG_NAME(mp->m_ddev_targp), + xfs_emerg(mp, + "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)imap->im_blkno, i, be16_to_cpu(dip->di_magic)); + ASSERT(0); #endif xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); @@ -317,7 +315,7 @@ xfs_iformat( if (unlikely(be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) > be64_to_cpu(dip->di_nblocks))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", (unsigned long long)ip->i_ino, (int)(be32_to_cpu(dip->di_nextents) + @@ -330,8 +328,7 @@ xfs_iformat( } if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt dinode %Lu, forkoff = 0x%x.", + xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", (unsigned long long)ip->i_ino, dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, @@ -341,7 +338,7 @@ xfs_iformat( if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && !ip->i_mount->m_rtdev_targp)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, + xfs_warn(ip->i_mount, "corrupt dinode %Lu, has realtime flag set.", ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", @@ -373,9 +370,8 @@ xfs_iformat( * no local regular files yet */ if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(local format for regular file).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (local format for regular file).", (unsigned long long) ip->i_ino); XFS_CORRUPTION_ERROR("xfs_iformat(4)", XFS_ERRLEVEL_LOW, @@ -385,9 +381,8 @@ xfs_iformat( di_size = be64_to_cpu(dip->di_size); if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad size %Ld for local inode).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %Ld for local inode).", (unsigned long long) ip->i_ino, (long long) di_size); XFS_CORRUPTION_ERROR("xfs_iformat(5)", @@ -431,9 +426,8 @@ xfs_iformat( size = be16_to_cpu(atp->hdr.totsize); if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad attr fork size %Ld).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad attr fork size %Ld).", (unsigned long long) ip->i_ino, (long long) size); XFS_CORRUPTION_ERROR("xfs_iformat(8)", @@ -488,9 +482,8 @@ xfs_iformat_local( * kmem_alloc() or memcpy() below. */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu " - "(bad size %d for local fork, size = %d).", + xfs_warn(ip->i_mount, + "corrupt inode %Lu (bad size %d for local fork, size = %d).", (unsigned long long) ip->i_ino, size, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, @@ -547,8 +540,7 @@ xfs_iformat_extents( * kmem_alloc() or memcpy() below. */ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu ((a)extents = %d).", + xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", (unsigned long long) ip->i_ino, nex); XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); @@ -623,11 +615,10 @@ xfs_iformat_btree( || XFS_BMDR_SPACE_CALC(nrecs) > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { - xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, - "corrupt inode %Lu (btree).", + xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", (unsigned long long) ip->i_ino); - XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, - ip->i_mount); + XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, + ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } @@ -813,11 +804,9 @@ xfs_iread( */ if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { #ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " - "dip->di_magic (0x%x) != " - "XFS_DINODE_MAGIC (0x%x)", - be16_to_cpu(dip->di_magic), - XFS_DINODE_MAGIC); + xfs_alert(mp, + "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", + __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); #endif /* DEBUG */ error = XFS_ERROR(EINVAL); goto out_brelse; @@ -835,9 +824,8 @@ xfs_iread( error = xfs_iformat(ip, dip); if (error) { #ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " - "xfs_iformat() returned error %d", - error); + xfs_alert(mp, "%s: xfs_iformat() returned error %d", + __func__, error); #endif /* DEBUG */ goto out_brelse; } @@ -932,7 +920,6 @@ xfs_iread_extents( /* * We know that the size is valid (it's checked in iformat_btree) */ - ifp->if_lastex = NULLEXTNUM; ifp->if_bytes = ifp->if_real_bytes = 0; ifp->if_flags |= XFS_IFEXTENTS; xfs_iext_add(ifp, 0, nextents); @@ -1016,8 +1003,8 @@ xfs_ialloc( * This is because we're setting fields here we need * to prevent others from looking at until we're done. */ - error = xfs_trans_iget(tp->t_mountp, tp, ino, - XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); + error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, + XFS_ILOCK_EXCL, &ip); if (error) return error; ASSERT(ip != NULL); @@ -1166,6 +1153,7 @@ xfs_ialloc( /* * Log the new values stuffed into the inode. */ + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, flags); /* now that we have an i_mode we can setup inode ops and unlock */ @@ -1365,7 +1353,7 @@ xfs_itruncate_start( return 0; } last_byte = xfs_file_last_byte(ip); - trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); + trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); if (last_byte > toss_start) { if (flags & XFS_ITRUNC_DEFINITE) { xfs_tosspages(ip, toss_start, @@ -1481,7 +1469,7 @@ xfs_itruncate_finish( * file but the log buffers containing the free and reallocation * don't, then we'd end up with garbage in the blocks being freed. * As long as we make the new_size permanent before actually - * freeing any blocks it doesn't matter if they get writtten to. + * freeing any blocks it doesn't matter if they get written to. * * The callers must signal into us whether or not the size * setting here must be synchronous. There are a few cases @@ -1820,9 +1808,8 @@ xfs_iunlink_remove( */ error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { - cmn_err(CE_WARN, - "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_itobp() returned error %d.", + __func__, error); return error; } next_agino = be32_to_cpu(dip->di_next_unlinked); @@ -1867,9 +1854,9 @@ xfs_iunlink_remove( error = xfs_inotobp(mp, tp, next_ino, &last_dip, &last_ibp, &last_offset, 0); if (error) { - cmn_err(CE_WARN, - "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, + "%s: xfs_inotobp() returned error %d.", + __func__, error); return error; } next_agino = be32_to_cpu(last_dip->di_next_unlinked); @@ -1882,9 +1869,8 @@ xfs_iunlink_remove( */ error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); if (error) { - cmn_err(CE_WARN, - "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.", + __func__, error); return error; } next_agino = be32_to_cpu(dip->di_next_unlinked); @@ -2571,12 +2557,9 @@ xfs_iflush_fork( case XFS_DINODE_FMT_EXTENTS: ASSERT((ifp->if_flags & XFS_IFEXTENTS) || !(iip->ili_format.ilf_fields & extflag[whichfork])); - ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || - (ifp->if_bytes == 0)); - ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || - (ifp->if_bytes > 0)); if ((iip->ili_format.ilf_fields & extflag[whichfork]) && (ifp->if_bytes > 0)) { + ASSERT(xfs_iext_get_ext(ifp, 0)); ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, whichfork); @@ -2802,7 +2785,7 @@ xfs_iflush( /* * We can't flush the inode until it is unpinned, so wait for it if we - * are allowed to block. We know noone new can pin it, because we are + * are allowed to block. We know no one new can pin it, because we are * holding the inode lock shared and you need to hold it exclusively to * pin the inode. * @@ -2848,7 +2831,7 @@ xfs_iflush( * Get the buffer containing the on-disk inode. */ error = xfs_itobp(mp, NULL, ip, &dip, &bp, - (flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK); + (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); if (error || !bp) { xfs_ifunlock(ip); return error; @@ -2939,16 +2922,16 @@ xfs_iflush_int( if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", - ip->i_ino, be16_to_cpu(dip->di_magic), dip); + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", + __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", - ip->i_ino, ip, ip->i_d.di_magic); + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x", + __func__, ip->i_ino, ip, ip->i_d.di_magic); goto corrupt_out; } if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { @@ -2956,9 +2939,9 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", - ip->i_ino, ip); + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad regular inode %Lu, ptr 0x%p", + __func__, ip->i_ino, ip); goto corrupt_out; } } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { @@ -2967,28 +2950,28 @@ xfs_iflush_int( (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", - ip->i_ino, ip); + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad directory inode %Lu, ptr 0x%p", + __func__, ip->i_ino, ip); goto corrupt_out; } } if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, XFS_RANDOM_IFLUSH_5)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", - ip->i_ino, + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: detected corrupt incore inode %Lu, " + "total extents = %d, nblocks = %Ld, ptr 0x%p", + __func__, ip->i_ino, ip->i_d.di_nextents + ip->i_d.di_anextents, - ip->i_d.di_nblocks, - ip); + ip->i_d.di_nblocks, ip); goto corrupt_out; } if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { - xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, - "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", - ip->i_ino, ip->i_d.di_forkoff, ip); + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p", + __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } /* @@ -3125,6 +3108,8 @@ xfs_iext_get_ext( xfs_extnum_t idx) /* index of target extent */ { ASSERT(idx >= 0); + ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { return ifp->if_u1.if_ext_irec->er_extbuf; } else if (ifp->if_flags & XFS_IFEXTIREC) { @@ -3204,7 +3189,6 @@ xfs_iext_add( } ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; - ifp->if_lastex = nextents + ext_diff; } /* * Otherwise use a linear (direct) extent list. @@ -3899,8 +3883,10 @@ xfs_iext_idx_to_irec( xfs_extnum_t page_idx = *idxp; /* extent index in target list */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); - ASSERT(page_idx >= 0 && page_idx <= - ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx >= 0); + ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); + ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp_idx = 0; low = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 5c95fa8ec11d..3ae6d58e5473 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -67,7 +67,6 @@ typedef struct xfs_ifork { short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ unsigned char if_ext_max; /* max # of extent records */ - xfs_extnum_t if_lastex; /* last if_extents used */ union { xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ @@ -111,7 +110,7 @@ struct xfs_imap { * Generally, we do not want to hold the i_rlock while holding the * i_ilock. Hierarchy is i_iolock followed by i_rlock. * - * xfs_iptr_t contains all the inode fields upto and including the + * xfs_iptr_t contains all the inode fields up to and including the * i_mnext and i_mprev fields, it is used as a marker in the inode * chain off the mount structure by xfs_sync calls. */ @@ -336,7 +335,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) /* * Project quota id helpers (previously projid was 16bit only - * and using two 16bit values to hold new 32bit projid was choosen + * and using two 16bit values to hold new 32bit projid was chosen * to retain compatibility with "old" filesystems). */ static inline prid_t @@ -409,28 +408,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * Flags for lockdep annotations. * - * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes - * (ie directory operations that require locking a directory inode and - * an entry inode). The first inode gets locked with this flag so it - * gets a lockdep subclass of 1 and the second lock will have a lockdep - * subclass of 0. + * XFS_LOCK_PARENT - for directory operations that require locking a + * parent directory inode and a child entry inode. The parent gets locked + * with this flag so it gets a lockdep subclass of 1 and the child entry + * lock will have a lockdep subclass of 0. + * + * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary + * inodes do not participate in the normal lock order, and thus have their + * own subclasses. * * XFS_LOCK_INUMORDER - for locking several inodes at the some time * with xfs_lock_inodes(). This flag is used as the starting subclass * and each subsequent lock acquired will increment the subclass by one. - * So the first lock acquired will have a lockdep subclass of 2, the - * second lock will have a lockdep subclass of 3, and so on. It is + * So the first lock acquired will have a lockdep subclass of 4, the + * second lock will have a lockdep subclass of 5, and so on. It is * the responsibility of the class builder to shift this to the correct * portion of the lock_mode lockdep mask. */ #define XFS_LOCK_PARENT 1 -#define XFS_LOCK_INUMORDER 2 +#define XFS_LOCK_RTBITMAP 2 +#define XFS_LOCK_RTSUM 3 +#define XFS_LOCK_INUMORDER 4 #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) +#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) #define XFS_IOLOCK_DEP_MASK 0x00ff0000 #define XFS_ILOCK_DEP_MASK 0xff000000 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fd4f398bd6f1..09983a3344a5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -198,6 +198,41 @@ xfs_inode_item_size( } /* + * xfs_inode_item_format_extents - convert in-core extents to on-disk form + * + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. In this case, we + * need to do this conversion before we write the extents into the log. Because + * we don't have the disk inode to write into here, we allocate a buffer and + * format the extents into it via xfs_iextents_copy(). We free the buffer in + * the unlock routine after the copy for the log has been made. + * + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only log on-disk extents + * here, so always use the physical fork size to determine the size of the + * buffer we need to allocate. + */ +STATIC void +xfs_inode_item_format_extents( + struct xfs_inode *ip, + struct xfs_log_iovec *vecp, + int whichfork, + int type) +{ + xfs_bmbt_rec_t *ext_buffer; + + ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); + if (whichfork == XFS_DATA_FORK) + ip->i_itemp->ili_extents_buf = ext_buffer; + else + ip->i_itemp->ili_aextents_buf = ext_buffer; + + vecp->i_addr = ext_buffer; + vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); + vecp->i_type = type; +} + +/* * This is called to fill in the vector of log iovecs for the * given inode log item. It fills the first item with an inode * log format structure, the second with the on-disk inode structure, @@ -213,7 +248,6 @@ xfs_inode_item_format( struct xfs_inode *ip = iip->ili_inode; uint nvecs; size_t data_bytes; - xfs_bmbt_rec_t *ext_buffer; xfs_mount_t *mp; vecp->i_addr = &iip->ili_format; @@ -320,22 +354,8 @@ xfs_inode_item_format( } else #endif { - /* - * There are delayed allocation extents - * in the inode, or we need to convert - * the extents to on disk format. - * Use xfs_iextents_copy() - * to copy only the real extents into - * a separate buffer. We'll free the - * buffer in the unlock routine. - */ - ext_buffer = kmem_alloc(ip->i_df.if_bytes, - KM_SLEEP); - iip->ili_extents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_DATA_FORK); - vecp->i_type = XLOG_REG_TYPE_IEXT; + xfs_inode_item_format_extents(ip, vecp, + XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); } ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; @@ -445,19 +465,12 @@ xfs_inode_item_format( */ vecp->i_addr = ip->i_afp->if_u1.if_extents; vecp->i_len = ip->i_afp->if_bytes; + vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; #else ASSERT(iip->ili_aextents_buf == NULL); - /* - * Need to endian flip before logging - */ - ext_buffer = kmem_alloc(ip->i_afp->if_bytes, - KM_SLEEP); - iip->ili_aextents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, - XFS_ATTR_FORK); + xfs_inode_item_format_extents(ip, vecp, + XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; iip->ili_format.ilf_asize = vecp->i_len; vecp++; nvecs++; @@ -760,11 +773,11 @@ xfs_inode_item_push( * Push the inode to it's backing buffer. This will not remove the * inode from the AIL - a further push will be required to trigger a * buffer push. However, this allows all the dirty inodes to be pushed - * to the buffer before it is pushed to disk. THe buffer IO completion - * will pull th einode from the AIL, mark it clean and unlock the flush + * to the buffer before it is pushed to disk. The buffer IO completion + * will pull the inode from the AIL, mark it clean and unlock the flush * lock. */ - (void) xfs_iflush(ip, 0); + (void) xfs_iflush(ip, SYNC_TRYLOCK); xfs_iunlock(ip, XFS_ILOCK_SHARED); } @@ -957,7 +970,6 @@ xfs_iflush_abort( { xfs_inode_log_item_t *iip = ip->i_itemp; - iip = ip->i_itemp; if (iip) { struct xfs_ail *ailp = iip->ili_item.li_ailp; if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8a0f044750c3..091d82b94c4d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -101,11 +101,11 @@ xfs_iomap_eof_align_last_fsb( } STATIC int -xfs_cmn_err_fsblock_zero( +xfs_alert_fsblock_zero( xfs_inode_t *ip, xfs_bmbt_irec_t *imap) { - xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, + xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " "blkcnt: %llx extent-state: %x\n", @@ -246,7 +246,7 @@ xfs_iomap_write_direct( } if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { - error = xfs_cmn_err_fsblock_zero(ip, imap); + error = xfs_alert_fsblock_zero(ip, imap); goto error_out; } @@ -464,7 +464,7 @@ retry: } if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) - return xfs_cmn_err_fsblock_zero(ip, &imap[0]); + return xfs_alert_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; return 0; @@ -614,7 +614,7 @@ xfs_iomap_write_allocate( * covers at least part of the callers request */ if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) - return xfs_cmn_err_fsblock_zero(ip, imap); + return xfs_alert_fsblock_zero(ip, imap); if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + @@ -724,7 +724,7 @@ xfs_iomap_write_unwritten( return XFS_ERROR(error); if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) - return xfs_cmn_err_fsblock_zero(ip, &imap); + return xfs_alert_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index dc1882adaf54..751e94fe1f77 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -204,7 +204,6 @@ xfs_bulkstat( xfs_agi_t *agi; /* agi header data */ xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ - xfs_daddr_t bno; /* inode cluster start daddr */ int chunkidx; /* current index into inode chunk */ int clustidx; /* current index into inode cluster */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ @@ -463,7 +462,6 @@ xfs_bulkstat( mp->m_sb.sb_inopblog); } ino = XFS_AGINO_TO_INO(mp, agno, agino); - bno = XFS_AGB_TO_DADDR(mp, agno, agbno); /* * Skip if this inode is free. */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ae6fef1ff563..211930246f20 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -374,11 +374,10 @@ xfs_log_mount( int error; if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); + xfs_notice(mp, "Mounting Filesystem"); else { - cmn_err(CE_NOTE, - "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", - mp->m_fsname); + xfs_notice(mp, +"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } @@ -393,7 +392,7 @@ xfs_log_mount( */ error = xfs_trans_ail_init(mp); if (error) { - cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); + xfs_warn(mp, "AIL initialisation failed: error %d", error); goto out_free_log; } mp->m_log->l_ailp = mp->m_ail; @@ -413,7 +412,8 @@ xfs_log_mount( if (readonly) mp->m_flags |= XFS_MOUNT_RDONLY; if (error) { - cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); + xfs_warn(mp, "log mount/recovery failed: error %d", + error); goto out_destroy_ail; } } @@ -542,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) */ } - if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_log_unmount: unmount record failed"); - } + if (error) + xfs_alert(mp, "%s: unmount record failed", __func__); spin_lock(&log->l_icloglock); @@ -763,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_trans_ail_tail(log->l_ailp) && + if (!xfs_ail_min_lsn(log->l_ailp) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; @@ -803,7 +801,7 @@ xlog_assign_tail_lsn( xfs_lsn_t tail_lsn; struct log *log = mp->m_log; - tail_lsn = xfs_trans_ail_tail(mp->m_ail); + tail_lsn = xfs_ail_min_lsn(mp->m_ail); if (!tail_lsn) tail_lsn = atomic64_read(&log->l_last_sync_lsn); @@ -852,7 +850,7 @@ xlog_space_left( * In this case we just want to return the size of the * log as the amount of space left. */ - xfs_fs_cmn_err(CE_ALERT, log->l_mp, + xfs_alert(log->l_mp, "xlog_space_left: head behind tail\n" " tail_cycle = %d, tail_bytes = %d\n" " GH cycle = %d, GH bytes = %d", @@ -1001,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp, log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); if (!log) { - xlog_warn("XFS: Log allocation failed: No memory!"); + xfs_warn(mp, "Log allocation failed: No memory!"); goto out; } @@ -1029,24 +1027,24 @@ xlog_alloc_log(xfs_mount_t *mp, if (xfs_sb_version_hassector(&mp->m_sb)) { log2_size = mp->m_sb.sb_logsectlog; if (log2_size < BBSHIFT) { - xlog_warn("XFS: Log sector size too small " - "(0x%x < 0x%x)", log2_size, BBSHIFT); + xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)", + log2_size, BBSHIFT); goto out_free_log; } log2_size -= BBSHIFT; if (log2_size > mp->m_sectbb_log) { - xlog_warn("XFS: Log sector size too large " - "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); + xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)", + log2_size, mp->m_sectbb_log); goto out_free_log; } /* for larger sector sizes, must have v2 or external log */ if (log2_size && log->l_logBBstart > 0 && !xfs_sb_version_haslogv2(&mp->m_sb)) { - - xlog_warn("XFS: log sector size (0x%x) invalid " - "for configuration.", log2_size); + xfs_warn(mp, + "log sector size (0x%x) invalid for configuration.", + log2_size); goto out_free_log; } } @@ -1241,7 +1239,7 @@ xlog_grant_push_ail( * the filesystem is shutting down. */ if (!XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_ail_push(log->l_ailp, threshold_lsn); + xfs_ail_push(log->l_ailp, threshold_lsn); } /* @@ -1451,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log) xlog_cil_destroy(log); + /* + * always need to ensure that the extra buffer does not point to memory + * owned by another log buffer before we free it. + */ + xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); + xfs_buf_free(log->l_xbuf); + iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { xfs_buf_free(iclog->ic_bp); @@ -1460,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log) } spinlock_destroy(&log->l_icloglock); - xfs_buf_free(log->l_xbuf); log->l_mp->m_log = NULL; kmem_free(log); } /* xlog_dealloc_log */ @@ -1563,38 +1567,36 @@ xlog_print_tic_res( "SWAPEXT" }; - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_log_write: reservation summary:\n" - " trans type = %s (%u)\n" - " unit res = %d bytes\n" - " current res = %d bytes\n" - " total reg = %u bytes (o/flow = %u bytes)\n" - " ophdrs = %u (ophdr space = %u bytes)\n" - " ophdr + reg = %u bytes\n" - " num regions = %u\n", - ((ticket->t_trans_type <= 0 || - ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? - "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), - ticket->t_trans_type, - ticket->t_unit_res, - ticket->t_curr_res, - ticket->t_res_arr_sum, ticket->t_res_o_flow, - ticket->t_res_num_ophdrs, ophdr_spc, - ticket->t_res_arr_sum + - ticket->t_res_o_flow + ophdr_spc, - ticket->t_res_num); + xfs_warn(mp, + "xfs_log_write: reservation summary:\n" + " trans type = %s (%u)\n" + " unit res = %d bytes\n" + " current res = %d bytes\n" + " total reg = %u bytes (o/flow = %u bytes)\n" + " ophdrs = %u (ophdr space = %u bytes)\n" + " ophdr + reg = %u bytes\n" + " num regions = %u\n", + ((ticket->t_trans_type <= 0 || + ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? + "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), + ticket->t_trans_type, + ticket->t_unit_res, + ticket->t_curr_res, + ticket->t_res_arr_sum, ticket->t_res_o_flow, + ticket->t_res_num_ophdrs, ophdr_spc, + ticket->t_res_arr_sum + + ticket->t_res_o_flow + ophdr_spc, + ticket->t_res_num); for (i = 0; i < ticket->t_res_num; i++) { - uint r_type = ticket->t_res_arr[i].r_type; - cmn_err(CE_WARN, - "region[%u]: %s - %u bytes\n", - i, + uint r_type = ticket->t_res_arr[i].r_type; + xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? "bad-rtype" : res_type_str[r_type-1]), ticket->t_res_arr[i].r_len); } - xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, + xfs_alert_tag(mp, XFS_PTAG_LOGRES, "xfs_log_write: reservation ran out. Need to up reservation"); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } @@ -1682,7 +1684,7 @@ xlog_write_setup_ophdr( case XFS_LOG: break; default: - xfs_fs_cmn_err(CE_WARN, log->l_mp, + xfs_warn(log->l_mp, "Bad XFS transaction clientid 0x%x in ticket 0x%p", ophdr->oh_clientid, ticket); return NULL; @@ -2264,7 +2266,7 @@ xlog_state_do_callback( if (repeats > 5000) { flushcnt += repeats; repeats = 0; - xfs_fs_cmn_err(CE_WARN, log->l_mp, + xfs_warn(log->l_mp, "%s: possible infinite loop (%d iterations)", __func__, flushcnt); } @@ -3052,10 +3054,8 @@ xfs_log_force( int error; error = _xfs_log_force(mp, flags, NULL); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " - "error %d returned.", error); - } + if (error) + xfs_warn(mp, "%s: error %d returned.", __func__, error); } /* @@ -3204,10 +3204,8 @@ xfs_log_force_lsn( int error; error = _xfs_log_force_lsn(mp, lsn, flags, NULL); - if (error) { - xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " - "error %d returned.", error); - } + if (error) + xfs_warn(mp, "%s: error %d returned.", __func__, error); } /* @@ -3256,13 +3254,6 @@ xfs_log_ticket_get( return ticket; } -xlog_tid_t -xfs_log_get_trans_ident( - struct xfs_trans *tp) -{ - return tp->t_ticket->t_tid; -} - /* * Allocate and initialise a new log ticket. */ @@ -3412,9 +3403,20 @@ xlog_verify_dest_ptr( } if (!good_ptr) - xlog_panic("xlog_verify_dest_ptr: invalid ptr"); + xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); } +/* + * Check to make sure the grant write head didn't just over lap the tail. If + * the cycles are the same, we can't be overlapping. Otherwise, make sure that + * the cycles differ by exactly one and check the byte count. + * + * This check is run unlocked, so can give false positives. Rather than assert + * on failures, use a warn-once flag and a panic tag to allow the admin to + * determine if they want to panic the machine when such an error occurs. For + * debug kernels this will have the same effect as using an assert but, unlinke + * an assert, it can be turned off at runtime. + */ STATIC void xlog_verify_grant_tail( struct log *log) @@ -3422,17 +3424,22 @@ xlog_verify_grant_tail( int tail_cycle, tail_blocks; int cycle, space; - /* - * Check to make sure the grant write head didn't just over lap the - * tail. If the cycles are the same, we can't be overlapping. - * Otherwise, make sure that the cycles differ by exactly one and - * check the byte count. - */ xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { - ASSERT(cycle - 1 == tail_cycle); - ASSERT(space <= BBTOB(tail_blocks)); + if (cycle - 1 != tail_cycle && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: cycle - 1 != tail_cycle", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } + + if (space > BBTOB(tail_blocks) && + !(log->l_flags & XLOG_TAIL_WARN)) { + xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, + "%s: space > BBTOB(tail_blocks)", __func__); + log->l_flags |= XLOG_TAIL_WARN; + } } } @@ -3448,16 +3455,16 @@ xlog_verify_tail_lsn(xlog_t *log, blocks = log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) - xlog_panic("xlog_verify_tail_lsn: ran out of log space"); + xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); } else { ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); if (BLOCK_LSN(tail_lsn) == log->l_prev_block) - xlog_panic("xlog_verify_tail_lsn: tail wrapped"); + xfs_emerg(log->l_mp, "%s: tail wrapped", __func__); blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; if (blocks < BTOBB(iclog->ic_offset) + 1) - xlog_panic("xlog_verify_tail_lsn: ran out of log space"); + xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); } } /* xlog_verify_tail_lsn */ @@ -3497,22 +3504,23 @@ xlog_verify_iclog(xlog_t *log, icptr = log->l_iclog; for (i=0; i < log->l_iclog_bufs; i++) { if (icptr == NULL) - xlog_panic("xlog_verify_iclog: invalid ptr"); + xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); icptr = icptr->ic_next; } if (icptr != log->l_iclog) - xlog_panic("xlog_verify_iclog: corrupt iclog ring"); + xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); spin_unlock(&log->l_icloglock); /* check log magic numbers */ if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) - xlog_panic("xlog_verify_iclog: invalid magic num"); + xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); ptr = (xfs_caddr_t) &iclog->ic_header; for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; ptr += BBSIZE) { if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) - xlog_panic("xlog_verify_iclog: unexpected magic num"); + xfs_emerg(log->l_mp, "%s: unexpected magic num", + __func__); } /* check fields */ @@ -3542,9 +3550,10 @@ xlog_verify_iclog(xlog_t *log, } } if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) - cmn_err(CE_WARN, "xlog_verify_iclog: " - "invalid clientid %d op 0x%p offset 0x%lx", - clientid, ophead, (unsigned long)field_offset); + xfs_warn(log->l_mp, + "%s: invalid clientid %d op 0x%p offset 0x%lx", + __func__, clientid, ophead, + (unsigned long)field_offset); /* check length */ field_offset = (__psint_t) diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3bd3291ef8d2..78c9039994af 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *); struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); void xfs_log_ticket_put(struct xlog_ticket *ticket); -xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); - void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_log_vec *log_vector, xfs_lsn_t *commit_lsn, int flags); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9ca59be08977..c7755d5a5fbe 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -29,6 +29,7 @@ #include "xfs_mount.h" #include "xfs_error.h" #include "xfs_alloc.h" +#include "xfs_discard.h" /* * Perform initial CIL structure initialisation. If the CIL is not @@ -361,19 +362,28 @@ xlog_cil_committed( int abort) { struct xfs_cil_ctx *ctx = args; - struct xfs_busy_extent *busyp, *n; + struct xfs_mount *mp = ctx->cil->xc_log->l_mp; xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, ctx->start_lsn, abort); - list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) - xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); + xfs_alloc_busy_sort(&ctx->busy_extents); + xfs_alloc_busy_clear(mp, &ctx->busy_extents, + (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); spin_lock(&ctx->cil->xc_cil_lock); list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_cil_lock); xlog_cil_free_logvec(ctx->lv_chain); + + if (!list_empty(&ctx->busy_extents)) { + ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); + + xfs_discard_extents(mp, &ctx->busy_extents); + xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); + } + kmem_free(ctx); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index d5f8be8f4bf6..2d3b6a498d63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i) return be32_to_cpu(i) >> 24; } -#define xlog_panic(args...) cmn_err(CE_PANIC, ## args) -#define xlog_exit(args...) cmn_err(CE_PANIC, ## args) -#define xlog_warn(args...) cmn_err(CE_WARN, ## args) - /* * In core log state */ @@ -148,6 +144,9 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being shutdown */ +#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ + +typedef __uint32_t xlog_tid_t; #ifdef __KERNEL__ /* @@ -574,7 +573,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector, * When we crack an atomic LSN, we sample it first so that the value will not * change while we are cracking it into the component values. This means we * will always get consistent component values to work from. This should always - * be used to smaple and crack LSNs taht are stored and updated in atomic + * be used to sample and crack LSNs that are stored and updated in atomic * variables. */ static inline void diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index aa0ebb776903..04142caedb2b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -92,7 +92,7 @@ xlog_get_bp( int nbblks) { if (!xlog_buf_bbcount_valid(log, nbblks)) { - xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); return NULL; @@ -101,7 +101,7 @@ xlog_get_bp( /* * We do log I/O in units of log sectors (a power-of-2 * multiple of the basic block size), so we round up the - * requested size to acommodate the basic blocks required + * requested size to accommodate the basic blocks required * for complete log sectors. * * In addition, the buffer may be used for a non-sector- @@ -112,7 +112,7 @@ xlog_get_bp( * an issue. Nor will this be a problem if the log I/O is * done in basic blocks (sector size 1). But otherwise we * extend the buffer by one extra log sector to ensure - * there's space to accomodate this possiblility. + * there's space to accommodate this possibility. */ if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; @@ -160,7 +160,7 @@ xlog_bread_noalign( int error; if (!xlog_buf_bbcount_valid(log, nbblks)) { - xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); return EFSCORRUPTED; @@ -205,6 +205,35 @@ xlog_bread( } /* + * Read at an offset into the buffer. Returns with the buffer in it's original + * state regardless of the result of the read. + */ +STATIC int +xlog_bread_offset( + xlog_t *log, + xfs_daddr_t blk_no, /* block to read from */ + int nbblks, /* blocks to read */ + xfs_buf_t *bp, + xfs_caddr_t offset) +{ + xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); + int orig_len = bp->b_buffer_length; + int error, error2; + + error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); + if (error) + return error; + + error = xlog_bread_noalign(log, blk_no, nbblks, bp); + + /* must reset buffer pointer even on error */ + error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); + if (error) + return error; + return error2; +} + +/* * Write out the buffer at the given block for the given number of blocks. * The buffer is kept locked across the write and is returned locked. * This can only be used for synchronous log writes. @@ -219,7 +248,7 @@ xlog_bwrite( int error; if (!xlog_buf_bbcount_valid(log, nbblks)) { - xlog_warn("XFS: Invalid block length (0x%x) given for buffer", + xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); return EFSCORRUPTED; @@ -254,9 +283,9 @@ xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { - cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", + xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", __func__, &mp->m_sb.sb_uuid, XLOG_FMT); - cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", + xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else @@ -279,15 +308,15 @@ xlog_header_check_recover( * a dirty log created in IRIX. */ if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { - xlog_warn( - "XFS: dirty log written in incompatible format - can't recover"); + xfs_warn(mp, + "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(1)", XFS_ERRLEVEL_HIGH, mp); return XFS_ERROR(EFSCORRUPTED); } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { - xlog_warn( - "XFS: dirty log entry has mismatched uuid - can't recover"); + xfs_warn(mp, + "dirty log entry has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_recover(2)", XFS_ERRLEVEL_HIGH, mp); @@ -312,9 +341,9 @@ xlog_header_check_mount( * h_fs_uuid is nil, we assume this log was last mounted * by IRIX and continue. */ - xlog_warn("XFS: nil uuid in log - IRIX style log"); + xfs_warn(mp, "nil uuid in log - IRIX style log"); } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { - xlog_warn("XFS: log has mismatched uuid - can't recover"); + xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); XFS_ERROR_REPORT("xlog_header_check_mount", XFS_ERRLEVEL_HIGH, mp); @@ -490,8 +519,8 @@ xlog_find_verify_log_record( for (i = (*last_blk) - 1; i >= 0; i--) { if (i < start_blk) { /* valid log record not found */ - xlog_warn( - "XFS: Log inconsistent (didn't find previous header)"); + xfs_warn(log->l_mp, + "Log inconsistent (didn't find previous header)"); ASSERT(0); error = XFS_ERROR(EIO); goto out; @@ -591,12 +620,12 @@ xlog_find_head( * mkfs etc write a dummy unmount record to a fresh * log so we can store the uuid in there */ - xlog_warn("XFS: totally zeroed log"); + xfs_warn(log->l_mp, "totally zeroed log"); } return 0; } else if (error) { - xlog_warn("XFS: empty log check failed"); + xfs_warn(log->l_mp, "empty log check failed"); return error; } @@ -819,7 +848,7 @@ validate_head: xlog_put_bp(bp); if (error) - xlog_warn("XFS: failed to find log head"); + xfs_warn(log->l_mp, "failed to find log head"); return error; } @@ -912,7 +941,7 @@ xlog_find_tail( } } if (!found) { - xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); ASSERT(0); return XFS_ERROR(EIO); } @@ -1028,7 +1057,7 @@ done: xlog_put_bp(bp); if (error) - xlog_warn("XFS: failed to locate log tail"); + xfs_warn(log->l_mp, "failed to locate log tail"); return error; } @@ -1092,7 +1121,8 @@ xlog_find_zeroed( * the first block must be 1. If it's not, maybe we're * not looking at a log... Bail out. */ - xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); + xfs_warn(log->l_mp, + "Log inconsistent or not a log (last==0, first!=1)"); return XFS_ERROR(EINVAL); } @@ -1228,20 +1258,12 @@ xlog_write_log_records( */ ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { - offset = XFS_BUF_PTR(bp); - balign = BBTOB(ealign - start_block); - error = XFS_BUF_SET_PTR(bp, offset + balign, - BBTOB(sectbb)); + offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); + error = xlog_bread_offset(log, ealign, sectbb, + bp, offset); if (error) break; - error = xlog_bread_noalign(log, ealign, sectbb, bp); - if (error) - break; - - error = XFS_BUF_SET_PTR(bp, offset, bufblks); - if (error) - break; } offset = xlog_align(log, start_block, endcount, bp); @@ -1506,8 +1528,8 @@ xlog_recover_add_to_trans( if (list_empty(&trans->r_itemq)) { /* we need to catch log corruptions here */ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { - xlog_warn("XFS: xlog_recover_add_to_trans: " - "bad header magic number"); + xfs_warn(log->l_mp, "%s: bad header magic number", + __func__); ASSERT(0); return XFS_ERROR(EIO); } @@ -1534,8 +1556,8 @@ xlog_recover_add_to_trans( if (item->ri_total == 0) { /* first region to be added */ if (in_f->ilf_size == 0 || in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { - xlog_warn( - "XFS: bad number of regions (%d) in inode log format", + xfs_warn(log->l_mp, + "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); return XFS_ERROR(EIO); @@ -1592,8 +1614,9 @@ xlog_recover_reorder_trans( list_move_tail(&item->ri_list, &trans->r_itemq); break; default: - xlog_warn( - "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); + xfs_warn(log->l_mp, + "%s: unrecognized type of log operation", + __func__); ASSERT(0); return XFS_ERROR(EIO); } @@ -1803,8 +1826,9 @@ xlog_recover_do_inode_buffer( logged_nextp = item->ri_buf[item_index].i_addr + next_unlinked_offset - reg_buf_offset; if (unlikely(*logged_nextp == 0)) { - xfs_fs_cmn_err(CE_ALERT, mp, - "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", + xfs_alert(mp, + "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). " + "Trying to replay bad (0) inode di_next_unlinked field.", item, bp); XFS_ERROR_REPORT("xlog_recover_do_inode_buf", XFS_ERRLEVEL_LOW, mp); @@ -1863,17 +1887,17 @@ xlog_recover_do_reg_buffer( if (buf_f->blf_flags & (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { if (item->ri_buf[i].i_addr == NULL) { - cmn_err(CE_ALERT, + xfs_alert(mp, "XFS: NULL dquot in %s.", __func__); goto next; } if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { - cmn_err(CE_ALERT, + xfs_alert(mp, "XFS: dquot too small (%d) in %s.", item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, + error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); if (error) @@ -1898,6 +1922,7 @@ xlog_recover_do_reg_buffer( */ int xfs_qm_dqcheck( + struct xfs_mount *mp, xfs_disk_dquot_t *ddq, xfs_dqid_t id, uint type, /* used only when IO_dorepair is true */ @@ -1924,14 +1949,14 @@ xfs_qm_dqcheck( */ if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, + xfs_alert(mp, "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); errs++; } if (ddq->d_version != XFS_DQUOT_VERSION) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, + xfs_alert(mp, "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", str, id, ddq->d_version, XFS_DQUOT_VERSION); errs++; @@ -1941,7 +1966,7 @@ xfs_qm_dqcheck( ddq->d_flags != XFS_DQ_PROJ && ddq->d_flags != XFS_DQ_GROUP) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, + xfs_alert(mp, "%s : XFS dquot ID 0x%x, unknown flags 0x%x", str, id, ddq->d_flags); errs++; @@ -1949,7 +1974,7 @@ xfs_qm_dqcheck( if (id != -1 && id != be32_to_cpu(ddq->d_id)) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, + xfs_alert(mp, "%s : ondisk-dquot 0x%p, ID mismatch: " "0x%x expected, found id 0x%x", str, ddq, id, be32_to_cpu(ddq->d_id)); @@ -1962,9 +1987,8 @@ xfs_qm_dqcheck( be64_to_cpu(ddq->d_blk_softlimit)) { if (!ddq->d_btimer) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, - "%s : Dquot ID 0x%x (0x%p) " - "BLK TIMER NOT STARTED", + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", str, (int)be32_to_cpu(ddq->d_id), ddq); errs++; } @@ -1974,9 +1998,8 @@ xfs_qm_dqcheck( be64_to_cpu(ddq->d_ino_softlimit)) { if (!ddq->d_itimer) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, - "%s : Dquot ID 0x%x (0x%p) " - "INODE TIMER NOT STARTED", + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", str, (int)be32_to_cpu(ddq->d_id), ddq); errs++; } @@ -1986,9 +2009,8 @@ xfs_qm_dqcheck( be64_to_cpu(ddq->d_rtb_softlimit)) { if (!ddq->d_rtbtimer) { if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_ALERT, - "%s : Dquot ID 0x%x (0x%p) " - "RTBLK TIMER NOT STARTED", + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", str, (int)be32_to_cpu(ddq->d_id), ddq); errs++; } @@ -1999,7 +2021,7 @@ xfs_qm_dqcheck( return errs; if (flags & XFS_QMOPT_DOWARN) - cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); + xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); /* * Typically, a repair is only requested by quotacheck. @@ -2218,9 +2240,9 @@ xlog_recover_inode_pass2( */ if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", - dip, bp, in_f->ilf_ino); + xfs_alert(mp, + "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", + __func__, dip, bp, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; @@ -2229,9 +2251,9 @@ xlog_recover_inode_pass2( dicp = item->ri_buf[1].i_addr; if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", - item, in_f->ilf_ino); + xfs_alert(mp, + "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", + __func__, item, in_f->ilf_ino); XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", XFS_ERRLEVEL_LOW, mp); error = EFSCORRUPTED; @@ -2263,9 +2285,10 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", - item, dip, bp, in_f->ilf_ino); + xfs_alert(mp, + "%s: Bad regular inode log record, rec ptr 0x%p, " + "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + __func__, item, dip, bp, in_f->ilf_ino); error = EFSCORRUPTED; goto error; } @@ -2276,9 +2299,10 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", - item, dip, bp, in_f->ilf_ino); + xfs_alert(mp, + "%s: Bad dir inode log record, rec ptr 0x%p, " + "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", + __func__, item, dip, bp, in_f->ilf_ino); error = EFSCORRUPTED; goto error; } @@ -2287,9 +2311,10 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", - item, dip, bp, in_f->ilf_ino, + xfs_alert(mp, + "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " + "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", + __func__, item, dip, bp, in_f->ilf_ino, dicp->di_nextents + dicp->di_anextents, dicp->di_nblocks); error = EFSCORRUPTED; @@ -2299,8 +2324,9 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", + xfs_alert(mp, + "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " + "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); error = EFSCORRUPTED; goto error; @@ -2309,9 +2335,9 @@ xlog_recover_inode_pass2( XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", - item->ri_buf[1].i_len, item); + xfs_alert(mp, + "%s: Bad inode log record length %d, rec ptr 0x%p", + __func__, item->ri_buf[1].i_len, item); error = EFSCORRUPTED; goto error; } @@ -2398,7 +2424,7 @@ xlog_recover_inode_pass2( break; default: - xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); + xfs_warn(log->l_mp, "%s: Invalid flag", __func__); ASSERT(0); xfs_buf_relse(bp); error = EIO; @@ -2467,13 +2493,11 @@ xlog_recover_dquot_pass2( recddq = item->ri_buf[1].i_addr; if (recddq == NULL) { - cmn_err(CE_ALERT, - "XFS: NULL dquot in %s.", __func__); + xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); return XFS_ERROR(EIO); } if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { - cmn_err(CE_ALERT, - "XFS: dquot too small (%d) in %s.", + xfs_alert(log->l_mp, "dquot too small (%d) in %s.", item->ri_buf[1].i_len, __func__); return XFS_ERROR(EIO); } @@ -2498,12 +2522,10 @@ xlog_recover_dquot_pass2( */ dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); - if ((error = xfs_qm_dqcheck(recddq, - dq_f->qlf_id, - 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2 (log copy)"))) { + error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + "xlog_recover_dquot_pass2 (log copy)"); + if (error) return XFS_ERROR(EIO); - } ASSERT(dq_f->qlf_len == 1); error = xfs_read_buf(mp, mp->m_ddev_targp, @@ -2523,8 +2545,9 @@ xlog_recover_dquot_pass2( * was among a chunk of dquots created earlier, and we did some * minimal initialization then. */ - if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, - "xlog_recover_dquot_pass2")) { + error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + "xlog_recover_dquot_pass2"); + if (error) { xfs_buf_relse(bp); return XFS_ERROR(EIO); } @@ -2676,9 +2699,8 @@ xlog_recover_commit_pass1( /* nothing to do in pass 1 */ return 0; default: - xlog_warn( - "XFS: invalid item type (%d) xlog_recover_commit_pass1", - ITEM_TYPE(item)); + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); ASSERT(0); return XFS_ERROR(EIO); } @@ -2707,9 +2729,8 @@ xlog_recover_commit_pass2( /* nothing to do in pass2 */ return 0; default: - xlog_warn( - "XFS: invalid item type (%d) xlog_recover_commit_pass2", - ITEM_TYPE(item)); + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); ASSERT(0); return XFS_ERROR(EIO); } @@ -2751,10 +2772,11 @@ xlog_recover_commit_trans( STATIC int xlog_recover_unmount_trans( + struct log *log, xlog_recover_t *trans) { /* Do nothing now */ - xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); + xfs_warn(log->l_mp, "%s: Unmount LR", __func__); return 0; } @@ -2797,8 +2819,8 @@ xlog_recover_process_data( dp += sizeof(xlog_op_header_t); if (ohead->oh_clientid != XFS_TRANSACTION && ohead->oh_clientid != XFS_LOG) { - xlog_warn( - "XFS: xlog_recover_process_data: bad clientid"); + xfs_warn(log->l_mp, "%s: bad clientid 0x%x", + __func__, ohead->oh_clientid); ASSERT(0); return (XFS_ERROR(EIO)); } @@ -2811,8 +2833,8 @@ xlog_recover_process_data( be64_to_cpu(rhead->h_lsn)); } else { if (dp + be32_to_cpu(ohead->oh_len) > lp) { - xlog_warn( - "XFS: xlog_recover_process_data: bad length"); + xfs_warn(log->l_mp, "%s: bad length 0x%x", + __func__, be32_to_cpu(ohead->oh_len)); WARN_ON(1); return (XFS_ERROR(EIO)); } @@ -2825,7 +2847,7 @@ xlog_recover_process_data( trans, pass); break; case XLOG_UNMOUNT_TRANS: - error = xlog_recover_unmount_trans(trans); + error = xlog_recover_unmount_trans(log, trans); break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(log, @@ -2833,8 +2855,8 @@ xlog_recover_process_data( be32_to_cpu(ohead->oh_len)); break; case XLOG_START_TRANS: - xlog_warn( - "XFS: xlog_recover_process_data: bad transaction"); + xfs_warn(log->l_mp, "%s: bad transaction", + __func__); ASSERT(0); error = XFS_ERROR(EIO); break; @@ -2844,8 +2866,8 @@ xlog_recover_process_data( dp, be32_to_cpu(ohead->oh_len)); break; default: - xlog_warn( - "XFS: xlog_recover_process_data: bad flag"); + xfs_warn(log->l_mp, "%s: bad flag 0x%x", + __func__, flags); ASSERT(0); error = XFS_ERROR(EIO); break; @@ -3030,8 +3052,7 @@ xlog_recover_clear_agi_bucket( out_abort: xfs_trans_cancel(tp, XFS_TRANS_ABORT); out_error: - xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " - "failed to clear agi %d. Continuing.", agno); + xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); return; } @@ -3282,7 +3303,7 @@ xlog_valid_rec_header( if (unlikely( (!rhead->h_version || (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { - xlog_warn("XFS: %s: unrecognised log version (%d).", + xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", __func__, be32_to_cpu(rhead->h_version)); return XFS_ERROR(EIO); } @@ -3448,19 +3469,9 @@ xlog_do_recovery_pass( * - order is important. */ wrapped_hblks = hblks - split_hblks; - error = XFS_BUF_SET_PTR(hbp, - offset + BBTOB(split_hblks), - BBTOB(hblks - split_hblks)); - if (error) - goto bread_err2; - - error = xlog_bread_noalign(log, 0, - wrapped_hblks, hbp); - if (error) - goto bread_err2; - - error = XFS_BUF_SET_PTR(hbp, offset, - BBTOB(hblks)); + error = xlog_bread_offset(log, 0, + wrapped_hblks, hbp, + offset + BBTOB(split_hblks)); if (error) goto bread_err2; } @@ -3511,19 +3522,9 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - error = XFS_BUF_SET_PTR(dbp, - offset + BBTOB(split_bblks), - BBTOB(bblks - split_bblks)); - if (error) - goto bread_err2; - - error = xlog_bread_noalign(log, wrapped_hblks, - bblks - split_bblks, - dbp); - if (error) - goto bread_err2; - - error = XFS_BUF_SET_PTR(dbp, offset, h_size); + error = xlog_bread_offset(log, 0, + bblks - split_bblks, hbp, + offset + BBTOB(split_bblks)); if (error) goto bread_err2; } @@ -3740,10 +3741,9 @@ xlog_recover( return error; } - cmn_err(CE_NOTE, - "Starting XFS recovery on filesystem: %s (logdev: %s)", - log->l_mp->m_fsname, log->l_mp->m_logname ? - log->l_mp->m_logname : "internal"); + xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", + log->l_mp->m_logname ? log->l_mp->m_logname + : "internal"); error = xlog_do_recover(log, head_blk, tail_blk); log->l_flags |= XLOG_RECOVERY_NEEDED; @@ -3776,9 +3776,7 @@ xlog_recover_finish( int error; error = xlog_recover_process_efis(log); if (error) { - cmn_err(CE_ALERT, - "Failed to recover EFIs on filesystem: %s", - log->l_mp->m_fsname); + xfs_alert(log->l_mp, "Failed to recover EFIs"); return error; } /* @@ -3793,15 +3791,12 @@ xlog_recover_finish( xlog_recover_check_summary(log); - cmn_err(CE_NOTE, - "Ending XFS recovery on filesystem: %s (logdev: %s)", - log->l_mp->m_fsname, log->l_mp->m_logname ? - log->l_mp->m_logname : "internal"); + xfs_notice(log->l_mp, "Ending recovery (logdev: %s)", + log->l_mp->m_logname ? log->l_mp->m_logname + : "internal"); log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { - cmn_err(CE_DEBUG, - "Ending clean XFS mount for filesystem: %s\n", - log->l_mp->m_fsname); + xfs_info(log->l_mp, "Ending clean mount"); } return 0; } @@ -3834,10 +3829,8 @@ xlog_recover_check_summary( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); if (error) { - xfs_fs_cmn_err(CE_ALERT, mp, - "xlog_recover_check_summary(agf)" - "agf read failed agno %d error %d", - agno, error); + xfs_alert(mp, "%s agf read failed agno %d error %d", + __func__, agno, error); } else { agfp = XFS_BUF_TO_AGF(agfbp); freeblks += be32_to_cpu(agfp->agf_freeblks) + @@ -3846,7 +3839,10 @@ xlog_recover_check_summary( } error = xfs_read_agi(mp, NULL, agno, &agibp); - if (!error) { + if (error) { + xfs_alert(mp, "%s agi read failed agno %d error %d", + __func__, agno, error); + } else { struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); itotal += be32_to_cpu(agi->agi_count); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d447aef84bc3..b49b82363d20 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -133,9 +133,7 @@ xfs_uuid_mount( return 0; if (uuid_is_nil(uuid)) { - cmn_err(CE_WARN, - "XFS: Filesystem %s has nil UUID - can't mount", - mp->m_fsname); + xfs_warn(mp, "Filesystem has nil UUID - can't mount"); return XFS_ERROR(EINVAL); } @@ -163,8 +161,7 @@ xfs_uuid_mount( out_duplicate: mutex_unlock(&xfs_uuid_table_mutex); - cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", - mp->m_fsname); + xfs_warn(mp, "Filesystem has duplicate UUID - can't mount"); return XFS_ERROR(EINVAL); } @@ -311,6 +308,8 @@ xfs_mount_validate_sb( xfs_sb_t *sbp, int flags) { + int loud = !(flags & XFS_MFSI_QUIET); + /* * If the log device and data device have the * same device number, the log is internal. @@ -319,28 +318,32 @@ xfs_mount_validate_sb( * a volume filesystem in a non-volume manner. */ if (sbp->sb_magicnum != XFS_SB_MAGIC) { - xfs_fs_mount_cmn_err(flags, "bad magic number"); + if (loud) + xfs_warn(mp, "bad magic number"); return XFS_ERROR(EWRONGFS); } if (!xfs_sb_good_version(sbp)) { - xfs_fs_mount_cmn_err(flags, "bad version"); + if (loud) + xfs_warn(mp, "bad version"); return XFS_ERROR(EWRONGFS); } if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { - xfs_fs_mount_cmn_err(flags, - "filesystem is marked as having an external log; " - "specify logdev on the\nmount command line."); + if (loud) + xfs_warn(mp, + "filesystem is marked as having an external log; " + "specify logdev on the mount command line."); return XFS_ERROR(EINVAL); } if (unlikely( sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { - xfs_fs_mount_cmn_err(flags, - "filesystem is marked as having an internal log; " - "do not specify logdev on\nthe mount command line."); + if (loud) + xfs_warn(mp, + "filesystem is marked as having an internal log; " + "do not specify logdev on the mount command line."); return XFS_ERROR(EINVAL); } @@ -369,7 +372,8 @@ xfs_mount_validate_sb( (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { - xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); + if (loud) + xfs_warn(mp, "SB sanity check 1 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -382,7 +386,8 @@ xfs_mount_validate_sb( (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { - xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); + if (loud) + xfs_warn(mp, "SB sanity check 2 failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -390,12 +395,12 @@ xfs_mount_validate_sb( * Until this is fixed only page-sized or smaller data blocks work. */ if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_fs_mount_cmn_err(flags, - "file system with blocksize %d bytes", - sbp->sb_blocksize); - xfs_fs_mount_cmn_err(flags, - "only pagesize (%ld) or less will currently work.", - PAGE_SIZE); + if (loud) { + xfs_warn(mp, + "File system with blocksize %d bytes. " + "Only pagesize (%ld) or less will currently work.", + sbp->sb_blocksize, PAGE_SIZE); + } return XFS_ERROR(ENOSYS); } @@ -409,21 +414,23 @@ xfs_mount_validate_sb( case 2048: break; default: - xfs_fs_mount_cmn_err(flags, - "inode size of %d bytes not supported", - sbp->sb_inodesize); + if (loud) + xfs_warn(mp, "inode size of %d bytes not supported", + sbp->sb_inodesize); return XFS_ERROR(ENOSYS); } if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_fs_mount_cmn_err(flags, - "file system too large to be mounted on this system."); + if (loud) + xfs_warn(mp, + "file system too large to be mounted on this system."); return XFS_ERROR(EFBIG); } if (unlikely(sbp->sb_inprogress)) { - xfs_fs_mount_cmn_err(flags, "file system busy"); + if (loud) + xfs_warn(mp, "file system busy"); return XFS_ERROR(EFSCORRUPTED); } @@ -431,8 +438,9 @@ xfs_mount_validate_sb( * Version 1 directory format has never worked on Linux. */ if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { - xfs_fs_mount_cmn_err(flags, - "file system using version 1 directory format"); + if (loud) + xfs_warn(mp, + "file system using version 1 directory format"); return XFS_ERROR(ENOSYS); } @@ -673,6 +681,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) unsigned int sector_size; xfs_buf_t *bp; int error; + int loud = !(flags & XFS_MFSI_QUIET); ASSERT(mp->m_sb_bp == NULL); ASSERT(mp->m_ddev_targp != NULL); @@ -688,7 +697,8 @@ reread: bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, XFS_SB_DADDR, sector_size, 0); if (!bp) { - xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); + if (loud) + xfs_warn(mp, "SB buffer read failed"); return EIO; } @@ -699,7 +709,8 @@ reread: xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { - xfs_fs_mount_cmn_err(flags, "SB validate failed"); + if (loud) + xfs_warn(mp, "SB validate failed"); goto release_buf; } @@ -707,9 +718,9 @@ reread: * We must be able to do sector-sized and sector-aligned IO. */ if (sector_size > mp->m_sb.sb_sectsize) { - xfs_fs_mount_cmn_err(flags, - "device supports only %u byte sectors (not %u)", - sector_size, mp->m_sb.sb_sectsize); + if (loud) + xfs_warn(mp, "device supports %u byte sectors (not %u)", + sector_size, mp->m_sb.sb_sectsize); error = ENOSYS; goto release_buf; } @@ -853,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp) if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || (BBTOB(mp->m_swidth) & mp->m_blockmask)) { if (mp->m_flags & XFS_MOUNT_RETERR) { - cmn_err(CE_WARN, - "XFS: alignment check 1 failed"); + xfs_warn(mp, "alignment check 1 failed"); return XFS_ERROR(EINVAL); } mp->m_dalign = mp->m_swidth = 0; @@ -867,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp) if (mp->m_flags & XFS_MOUNT_RETERR) { return XFS_ERROR(EINVAL); } - xfs_fs_cmn_err(CE_WARN, mp, -"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", + xfs_warn(mp, + "stripe alignment turned off: sunit(%d)/swidth(%d) " + "incompatible with agsize(%d)", mp->m_dalign, mp->m_swidth, sbp->sb_agblocks); @@ -878,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp) mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); } else { if (mp->m_flags & XFS_MOUNT_RETERR) { - xfs_fs_cmn_err(CE_WARN, mp, -"stripe alignment turned off: sunit(%d) less than bsize(%d)", - mp->m_dalign, + xfs_warn(mp, + "stripe alignment turned off: sunit(%d) less than bsize(%d)", + mp->m_dalign, mp->m_blockmask +1); return XFS_ERROR(EINVAL); } @@ -1026,14 +1037,14 @@ xfs_check_sizes(xfs_mount_t *mp) d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { - cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); + xfs_warn(mp, "filesystem size mismatch detected"); return XFS_ERROR(EFBIG); } bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, d - XFS_FSS_TO_BB(mp, 1), BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); if (!bp) { - cmn_err(CE_WARN, "XFS: last sector read failed"); + xfs_warn(mp, "last sector read failed"); return EIO; } xfs_buf_relse(bp); @@ -1041,14 +1052,14 @@ xfs_check_sizes(xfs_mount_t *mp) if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { - cmn_err(CE_WARN, "XFS: log size mismatch detected"); + xfs_warn(mp, "log size mismatch detected"); return XFS_ERROR(EFBIG); } bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_B(mp, 1), 0); if (!bp) { - cmn_err(CE_WARN, "XFS: log device read failed"); + xfs_warn(mp, "log device read failed"); return EIO; } xfs_buf_relse(bp); @@ -1086,7 +1097,7 @@ xfs_mount_reset_sbqflags( return 0; #ifdef QUOTADEBUG - xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); + xfs_notice(mp, "Writing superblock quota changes"); #endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); @@ -1094,8 +1105,7 @@ xfs_mount_reset_sbqflags( XFS_DEFAULT_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); - xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_mount_reset_sbqflags: Superblock update failed!"); + xfs_alert(mp, "%s: Superblock update failed!", __func__); return error; } @@ -1161,8 +1171,7 @@ xfs_mountfs( * transaction subsystem is online. */ if (xfs_sb_has_mismatched_features2(sbp)) { - cmn_err(CE_WARN, - "XFS: correcting sb_features alignment problem"); + xfs_warn(mp, "correcting sb_features alignment problem"); sbp->sb_features2 |= sbp->sb_bad_features2; sbp->sb_bad_features2 = sbp->sb_features2; mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; @@ -1241,7 +1250,7 @@ xfs_mountfs( */ error = xfs_rtmount_init(mp); if (error) { - cmn_err(CE_WARN, "XFS: RT mount failed"); + xfs_warn(mp, "RT mount failed"); goto out_remove_uuid; } @@ -1272,12 +1281,12 @@ xfs_mountfs( INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); if (error) { - cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); + xfs_warn(mp, "Failed per-ag init: %d", error); goto out_remove_uuid; } if (!sbp->sb_logblocks) { - cmn_err(CE_WARN, "XFS: no log defined"); + xfs_warn(mp, "no log defined"); XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); goto out_free_perag; @@ -1290,7 +1299,7 @@ xfs_mountfs( XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); if (error) { - cmn_err(CE_WARN, "XFS: log mount failed"); + xfs_warn(mp, "log mount failed"); goto out_free_perag; } @@ -1327,16 +1336,14 @@ xfs_mountfs( */ error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); if (error) { - cmn_err(CE_WARN, "XFS: failed to read root inode"); + xfs_warn(mp, "failed to read root inode"); goto out_log_dealloc; } ASSERT(rip != NULL); if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { - cmn_err(CE_WARN, "XFS: corrupted root inode"); - cmn_err(CE_WARN, "Device %s - root %llu is not a directory", - XFS_BUFTARG_NAME(mp->m_ddev_targp), + xfs_warn(mp, "corrupted root inode %llu: not a directory", (unsigned long long)rip->i_ino); xfs_iunlock(rip, XFS_ILOCK_EXCL); XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, @@ -1356,7 +1363,7 @@ xfs_mountfs( /* * Free up the root inode. */ - cmn_err(CE_WARN, "XFS: failed to read RT inodes"); + xfs_warn(mp, "failed to read RT inodes"); goto out_rele_rip; } @@ -1368,7 +1375,7 @@ xfs_mountfs( if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { error = xfs_mount_log_sb(mp, mp->m_update_flags); if (error) { - cmn_err(CE_WARN, "XFS: failed to write sb changes"); + xfs_warn(mp, "failed to write sb changes"); goto out_rtunmount; } } @@ -1389,10 +1396,7 @@ xfs_mountfs( * quotachecked license. */ if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { - cmn_err(CE_NOTE, - "XFS: resetting qflags for filesystem %s", - mp->m_fsname); - + xfs_notice(mp, "resetting quota flags"); error = xfs_mount_reset_sbqflags(mp); if (error) return error; @@ -1406,7 +1410,7 @@ xfs_mountfs( */ error = xfs_log_mount_finish(mp); if (error) { - cmn_err(CE_WARN, "XFS: log mount finish failed"); + xfs_warn(mp, "log mount finish failed"); goto out_rtunmount; } @@ -1435,8 +1439,8 @@ xfs_mountfs( resblks = xfs_default_resblks(mp); error = xfs_reserve_blocks(mp, &resblks, NULL); if (error) - cmn_err(CE_WARN, "XFS: Unable to allocate reserve " - "blocks. Continuing without a reserve pool."); + xfs_warn(mp, + "Unable to allocate reserve blocks. Continuing without reserve pool."); } return 0; @@ -1525,12 +1529,12 @@ xfs_unmountfs( resblks = 0; error = xfs_reserve_blocks(mp, &resblks, NULL); if (error) - cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " + xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); error = xfs_log_sbcount(mp, 1); if (error) - cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " + xfs_warn(mp, "Unable to update superblock counters. " "Freespace may not be correct on next mount."); xfs_unmountfs_writesb(mp); xfs_unmountfs_wait(mp); /* wait for async bufs */ @@ -1896,7 +1900,7 @@ xfs_mod_incore_sb_batch( uint nmsb, int rsvd) { - xfs_mod_sb_t *msbp = &msb[0]; + xfs_mod_sb_t *msbp; int error = 0; /* @@ -1906,7 +1910,7 @@ xfs_mod_incore_sb_batch( * changes will be atomic. */ spin_lock(&mp->m_sb_lock); - for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { + for (msbp = msb; msbp < (msb + nmsb); msbp++) { ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || msbp->msb_field > XFS_SBS_FDBLOCKS); @@ -2013,10 +2017,8 @@ xfs_dev_is_read_only( if (xfs_readonly_buftarg(mp->m_ddev_targp) || xfs_readonly_buftarg(mp->m_logdev_targp) || (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { - cmn_err(CE_NOTE, - "XFS: %s required on read-only device.", message); - cmn_err(CE_NOTE, - "XFS: write access unavailable, cannot proceed."); + xfs_notice(mp, "%s required on read-only device.", message); + xfs_notice(mp, "write access unavailable, cannot proceed."); return EROFS; } return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a62e8971539d..3d68bb267c5f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -203,12 +203,9 @@ typedef struct xfs_mount { struct mutex m_icsb_mutex; /* balancer sync lock */ #endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ - struct task_struct *m_sync_task; /* generalised sync thread */ - xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ - struct list_head m_sync_list; /* sync thread work item list */ - spinlock_t m_sync_lock; /* work item list lock */ - int m_sync_seq; /* sync thread generation no. */ - wait_queue_head_t m_wait_single_sync_task; + struct delayed_work m_sync_work; /* background sync work */ + struct delayed_work m_reclaim_work; /* background inode reclaim */ + struct work_struct m_flush_work; /* background inode flush */ __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct shrinker m_inode_shrink; /* inode reclaim shrinker */ @@ -227,6 +224,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem operations, typically for disk errors in metadata */ +#define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to user */ #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index edfa178bafb6..4aff56395732 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -309,7 +309,7 @@ xfs_mru_cache_init(void) if (!xfs_mru_elem_zone) goto out; - xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); + xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); if (!xfs_mru_reap_wq) goto out_destroy_mru_elem_zone; diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 9bb6eda4cd21..a595f29567fe 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -382,7 +382,8 @@ static inline int xfs_qm_sync(struct xfs_mount *mp, int flags) xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) -extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); +extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, + xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 12a191385310..8f76fdff4f46 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -76,7 +76,7 @@ xfs_growfs_rt_alloc( xfs_mount_t *mp, /* file system mount point */ xfs_extlen_t oblocks, /* old count of blocks */ xfs_extlen_t nblocks, /* new count of blocks */ - xfs_ino_t ino) /* inode number (bitmap/summary) */ + xfs_inode_t *ip) /* inode (bitmap/summary) */ { xfs_fileoff_t bno; /* block number in file */ xfs_buf_t *bp; /* temporary buffer for zeroing */ @@ -86,7 +86,6 @@ xfs_growfs_rt_alloc( xfs_fsblock_t firstblock; /* first block allocated in xaction */ xfs_bmap_free_t flist; /* list of freed blocks */ xfs_fsblock_t fsbno; /* filesystem block for bno */ - xfs_inode_t *ip; /* pointer to incore inode */ xfs_bmbt_irec_t map; /* block map output */ int nmap; /* number of block maps */ int resblks; /* space reservation */ @@ -112,9 +111,9 @@ xfs_growfs_rt_alloc( /* * Lock the inode. */ - if ((error = xfs_trans_iget(mp, tp, ino, 0, - XFS_ILOCK_EXCL, &ip))) - goto error_cancel; + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); + xfs_bmap_init(&flist, &firstblock); /* * Allocate blocks to the bitmap file. @@ -155,9 +154,8 @@ xfs_growfs_rt_alloc( /* * Lock the bitmap inode. */ - if ((error = xfs_trans_iget(mp, tp, ino, 0, - XFS_ILOCK_EXCL, &ip))) - goto error_cancel; + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL); /* * Get a buffer for the block. */ @@ -1854,7 +1852,6 @@ xfs_growfs_rt( xfs_rtblock_t bmbno; /* bitmap block number */ xfs_buf_t *bp; /* temporary buffer */ int error; /* error return value */ - xfs_inode_t *ip; /* bitmap inode, used as lock */ xfs_mount_t *nmp; /* new (fake) mount structure */ xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ @@ -1918,11 +1915,11 @@ xfs_growfs_rt( /* * Allocate space to the bitmap and summary files, as necessary. */ - if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, - mp->m_sb.sb_rbmino))) + error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); + if (error) return error; - if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, - mp->m_sb.sb_rsumino))) + error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); + if (error) return error; /* * Allocate a new (fake) mount/sb. @@ -1972,10 +1969,8 @@ xfs_growfs_rt( /* * Lock out other callers by grabbing the bitmap inode lock. */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, - XFS_ILOCK_EXCL, &ip))) - goto error_cancel; - ASSERT(ip == mp->m_rbmip); + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* * Update the bitmap inode's size. */ @@ -1986,10 +1981,8 @@ xfs_growfs_rt( /* * Get the summary inode into the transaction. */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, - XFS_ILOCK_EXCL, &ip))) - goto error_cancel; - ASSERT(ip == mp->m_rsumip); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* * Update the summary inode's size. */ @@ -2075,15 +2068,15 @@ xfs_rtallocate_extent( xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ { + xfs_mount_t *mp = tp->t_mountp; int error; /* error value */ - xfs_inode_t *ip; /* inode for bitmap file */ - xfs_mount_t *mp; /* file system mount structure */ xfs_rtblock_t r; /* result allocated block */ xfs_fsblock_t sb; /* summary file block number */ xfs_buf_t *sumbp; /* summary file block buffer */ + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); ASSERT(minlen > 0 && minlen <= maxlen); - mp = tp->t_mountp; + /* * If prod is set then figure out what to do to minlen and maxlen. */ @@ -2099,12 +2092,7 @@ xfs_rtallocate_extent( return 0; } } - /* - * Lock out other callers by grabbing the bitmap inode lock. - */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, - XFS_ILOCK_EXCL, &ip))) - return error; + sumbp = NULL; /* * Allocate by size, or near another block, or exactly at some block. @@ -2123,11 +2111,12 @@ xfs_rtallocate_extent( len, &sumbp, &sb, prod, &r); break; default: + error = EIO; ASSERT(0); } - if (error) { + if (error) return error; - } + /* * If it worked, update the superblock. */ @@ -2155,7 +2144,6 @@ xfs_rtfree_extent( xfs_extlen_t len) /* length of extent freed */ { int error; /* error value */ - xfs_inode_t *ip; /* bitmap file inode */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ xfs_buf_t *sumbp; /* summary file block buffer */ @@ -2164,9 +2152,9 @@ xfs_rtfree_extent( /* * Synchronize by locking the bitmap inode. */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, - XFS_ILOCK_EXCL, &ip))) - return error; + xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL); + #if defined(__KERNEL__) && defined(DEBUG) /* * Check to see that this whole range is currently allocated. @@ -2199,10 +2187,10 @@ xfs_rtfree_extent( */ if (tp->t_frextents_delta + mp->m_sb.sb_frextents == mp->m_sb.sb_rextents) { - if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) - ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&ip->i_d.di_atime = 0; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); } return 0; } @@ -2222,8 +2210,8 @@ xfs_rtmount_init( if (sbp->sb_rblocks == 0) return 0; if (mp->m_rtdev_targp == NULL) { - cmn_err(CE_WARN, - "XFS: This filesystem has a realtime volume, use rtdev=device option"); + xfs_warn(mp, + "Filesystem has a realtime volume, use rtdev=device option"); return XFS_ERROR(ENODEV); } mp->m_rsumlevels = sbp->sb_rextslog + 1; @@ -2237,7 +2225,7 @@ xfs_rtmount_init( */ d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { - cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", + xfs_warn(mp, "realtime mount -- %llu != %llu", (unsigned long long) XFS_BB_TO_FSB(mp, d), (unsigned long long) mp->m_sb.sb_rblocks); return XFS_ERROR(EFBIG); @@ -2246,7 +2234,7 @@ xfs_rtmount_init( d - XFS_FSB_TO_BB(mp, 1), XFS_FSB_TO_B(mp, 1), 0); if (!bp) { - cmn_err(CE_WARN, "XFS: realtime device size check failed"); + xfs_warn(mp, "realtime device size check failed"); return EIO; } xfs_buf_relse(bp); @@ -2306,20 +2294,16 @@ xfs_rtpick_extent( xfs_rtblock_t *pick) /* result rt extent */ { xfs_rtblock_t b; /* result block */ - int error; /* error return value */ - xfs_inode_t *ip; /* bitmap incore inode */ int log2; /* log of sequence number */ __uint64_t resid; /* residual after log removed */ __uint64_t seq; /* sequence number of file creation */ __uint64_t *seqp; /* pointer to seqno in inode */ - if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, - XFS_ILOCK_EXCL, &ip))) - return error; - ASSERT(ip == mp->m_rbmip); - seqp = (__uint64_t *)&ip->i_d.di_atime; - if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { - ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + + seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime; + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) { + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; *seqp = 0; } seq = *seqp; @@ -2335,7 +2319,7 @@ xfs_rtpick_extent( b = mp->m_sb.sb_rextents - len; } *seqp = seq + 1; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); *pick = b; return 0; } diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index ff614c29b441..09e1f4f35e97 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -154,7 +154,7 @@ xfs_rtmount_init( if (mp->m_sb.sb_rblocks == 0) return 0; - cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); + xfs_warn(mp, "Not built with CONFIG_XFS_RT"); return ENOSYS; } # define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 56861d5daaef..d6d6fdfe9422 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -49,9 +49,9 @@ xfs_do_force_shutdown( logerror = flags & SHUTDOWN_LOG_IO_ERROR; if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { - cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " - "line %d of file %s. Return address = 0x%p", - mp->m_fsname, flags, lnnum, fname, __return_address); + xfs_notice(mp, + "%s(0x%x) called from line %d of file %s. Return address = 0x%p", + __func__, flags, lnnum, fname, __return_address); } /* * No need to duplicate efforts. @@ -69,30 +69,25 @@ xfs_do_force_shutdown( return; if (flags & SHUTDOWN_CORRUPT_INCORE) { - xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, - "Corruption of in-memory data detected. Shutting down filesystem: %s", - mp->m_fsname); - if (XFS_ERRLEVEL_HIGH <= xfs_error_level) { + xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, + "Corruption of in-memory data detected. Shutting down filesystem"); + if (XFS_ERRLEVEL_HIGH <= xfs_error_level) xfs_stack_trace(); - } } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { if (logerror) { - xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, - "Log I/O Error Detected. Shutting down filesystem: %s", - mp->m_fsname); + xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, + "Log I/O Error Detected. Shutting down filesystem"); } else if (flags & SHUTDOWN_DEVICE_REQ) { - xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, - "All device paths lost. Shutting down filesystem: %s", - mp->m_fsname); + xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, + "All device paths lost. Shutting down filesystem"); } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { - xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, - "I/O Error Detected. Shutting down filesystem: %s", - mp->m_fsname); + xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, + "I/O Error Detected. Shutting down filesystem"); } } if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { - cmn_err(CE_ALERT, "Please umount the filesystem, " - "and rectify the problem(s)"); + xfs_alert(mp, + "Please umount the filesystem and rectify the problem(s)"); } } @@ -106,10 +101,9 @@ xfs_ioerror_alert( xfs_buf_t *bp, xfs_daddr_t blkno) { - cmn_err(CE_ALERT, - "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" - " (\"%s\") error %d buf count %zd", - (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname, + xfs_alert(mp, + "I/O error occurred: meta-data dev %s block 0x%llx" + " (\"%s\") error %d buf count %zd", XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), (__uint64_t)blkno, func, XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); @@ -173,17 +167,9 @@ xfs_extlen_t xfs_get_extsz_hint( struct xfs_inode *ip) { - xfs_extlen_t extsz; - - if (unlikely(XFS_IS_REALTIME_INODE(ip))) { - extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) - ? ip->i_d.di_extsize - : ip->i_mount->m_sb.sb_rextsize; - ASSERT(extsz); - } else { - extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) - ? ip->i_d.di_extsize : 0; - } - - return extsz; + if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize) + return ip->i_d.di_extsize; + if (XFS_IS_REALTIME_INODE(ip)) + return ip->i_mount->m_sb.sb_rextsize; + return 0; } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 76922793f64f..7c7bc2b786bd 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -608,10 +608,8 @@ STATIC void xfs_trans_free( struct xfs_trans *tp) { - struct xfs_busy_extent *busyp, *n; - - list_for_each_entry_safe(busyp, n, &tp->t_busy, list) - xfs_alloc_busy_clear(tp->t_mountp, busyp); + xfs_alloc_busy_sort(&tp->t_busy); + xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); atomic_dec(&tp->t_mountp->m_active_trans); xfs_trans_free_dqinfo(tp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c2042b736b81..06a9759b6352 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -469,8 +469,6 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); -int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, - xfs_ino_t , uint, uint, struct xfs_inode **); void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index c5bbbc45db91..5fc2380092c8 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,74 +28,138 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); -STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); -STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); +struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ #ifdef DEBUG -STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); -#else +/* + * Check that the list is sorted as it should be. + */ +STATIC void +xfs_ail_check( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_log_item_t *prev_lip; + + if (list_empty(&ailp->xa_ail)) + return; + + /* + * Check the next and previous entries are valid. + */ + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + + prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); + + +#ifdef XFS_TRANS_DEBUG + /* + * Walk the list checking lsn ordering, and that every entry has the + * XFS_LI_IN_AIL flag set. This is really expensive, so only do it + * when specifically debugging the transaction subsystem. + */ + prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + if (&prev_lip->li_ail != &ailp->xa_ail) + ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); + ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); + prev_lip = lip; + } +#endif /* XFS_TRANS_DEBUG */ +} +#else /* !DEBUG */ #define xfs_ail_check(a,l) #endif /* DEBUG */ +/* + * Return a pointer to the first item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_min( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); +} + + /* + * Return a pointer to the last item in the AIL. If the AIL is empty, then + * return NULL. + */ +static xfs_log_item_t * +xfs_ail_max( + struct xfs_ail *ailp) +{ + if (list_empty(&ailp->xa_ail)) + return NULL; + + return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail); +} + +/* + * Return a pointer to the item which follows the given item in the AIL. If + * the given item is the last item in the list, then return NULL. + */ +static xfs_log_item_t * +xfs_ail_next( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + if (lip->li_ail.next == &ailp->xa_ail) + return NULL; + + return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); +} /* - * This is called by the log manager code to determine the LSN - * of the tail of the log. This is exactly the LSN of the first - * item in the AIL. If the AIL is empty, then this function - * returns 0. + * This is called by the log manager code to determine the LSN of the tail of + * the log. This is exactly the LSN of the first item in the AIL. If the AIL + * is empty, then this function returns 0. * - * We need the AIL lock in order to get a coherent read of the - * lsn of the last item in the AIL. + * We need the AIL lock in order to get a coherent read of the lsn of the last + * item in the AIL. */ xfs_lsn_t -xfs_trans_ail_tail( +xfs_ail_min_lsn( struct xfs_ail *ailp) { - xfs_lsn_t lsn; + xfs_lsn_t lsn = 0; xfs_log_item_t *lip; spin_lock(&ailp->xa_lock); lip = xfs_ail_min(ailp); - if (lip == NULL) { - lsn = (xfs_lsn_t)0; - } else { + if (lip) lsn = lip->li_lsn; - } spin_unlock(&ailp->xa_lock); return lsn; } /* - * xfs_trans_push_ail - * - * This routine is called to move the tail of the AIL forward. It does this by - * trying to flush items in the AIL whose lsns are below the given - * threshold_lsn. - * - * the push is run asynchronously in a separate thread, so we return the tail - * of the log right now instead of the tail after the push. This means we will - * either continue right away, or we will sleep waiting on the async thread to - * do its work. - * - * We do this unlocked - we only need to know whether there is anything in the - * AIL at the time we are called. We don't need to access the contents of - * any of the objects, so the lock is not needed. + * Return the maximum lsn held in the AIL, or zero if the AIL is empty. */ -void -xfs_trans_ail_push( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) +static xfs_lsn_t +xfs_ail_max_lsn( + struct xfs_ail *ailp) { - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + xfs_log_item_t *lip; - lip = xfs_ail_min(ailp); - if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { - if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) - xfsaild_wakeup(ailp, threshold_lsn); - } + spin_lock(&ailp->xa_lock); + lip = xfs_ail_max(ailp); + if (lip) + lsn = lip->li_lsn; + spin_unlock(&ailp->xa_lock); + + return lsn; } /* @@ -236,35 +300,78 @@ out: } /* - * xfsaild_push does the work of pushing on the AIL. Returning a timeout of - * zero indicates that the caller should sleep until woken. + * splice the log item list into the AIL at the given LSN. */ -long -xfsaild_push( - struct xfs_ail *ailp, - xfs_lsn_t *last_lsn) +static void +xfs_ail_splice( + struct xfs_ail *ailp, + struct list_head *list, + xfs_lsn_t lsn) { - long tout = 0; - xfs_lsn_t last_pushed_lsn = *last_lsn; - xfs_lsn_t target = ailp->xa_target; - xfs_lsn_t lsn; - xfs_log_item_t *lip; - int flush_log, count, stuck; - xfs_mount_t *mp = ailp->xa_mount; + xfs_log_item_t *next_lip; + + /* If the list is empty, just insert the item. */ + if (list_empty(&ailp->xa_ail)) { + list_splice(list, &ailp->xa_ail); + return; + } + + list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) + break; + } + + ASSERT(&next_lip->li_ail == &ailp->xa_ail || + XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0); + + list_splice_init(list, &next_lip->li_ail); +} + +/* + * Delete the given item from the AIL. Return a pointer to the item. + */ +static void +xfs_ail_delete( + struct xfs_ail *ailp, + xfs_log_item_t *lip) +{ + xfs_ail_check(ailp, lip); + list_del(&lip->li_ail); + xfs_trans_ail_cursor_clear(ailp, lip); +} + +/* + * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself + * to run at a later time if there is more work to do to complete the push. + */ +STATIC void +xfs_ail_worker( + struct work_struct *work) +{ + struct xfs_ail *ailp = container_of(to_delayed_work(work), + struct xfs_ail, xa_work); + xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; - int push_xfsbufd = 0; + xfs_log_item_t *lip; + xfs_lsn_t lsn; + xfs_lsn_t target; + long tout = 10; + int flush_log = 0; + int stuck = 0; + int count = 0; + int push_xfsbufd = 0; spin_lock(&ailp->xa_lock); + target = ailp->xa_target; xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); + lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&ailp->xa_lock); - *last_lsn = 0; - return tout; + goto out_done; } XFS_STATS_INC(xs_push_ail); @@ -281,8 +388,7 @@ xfsaild_push( * lots of contention on the AIL lists. */ lsn = lip->li_lsn; - flush_log = stuck = count = 0; - while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { + while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { int lock_result; /* * If we can lock the item without sleeping, unlock the AIL @@ -301,13 +407,13 @@ xfsaild_push( case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; break; case XFS_ITEM_PUSHBUF: XFS_STATS_INC(xs_push_ail_pushbuf); IOP_PUSHBUF(lip); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; push_xfsbufd = 1; break; @@ -319,7 +425,7 @@ xfsaild_push( case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); - last_pushed_lsn = lsn; + ailp->xa_last_pushed_lsn = lsn; stuck++; break; @@ -374,9 +480,27 @@ xfsaild_push( wake_up_process(mp->m_ddev_targp->bt_task); } + /* assume we have more work to do in a short while */ +out_done: if (!count) { /* We're past our target or empty, so idle */ - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; + + /* + * We clear the XFS_AIL_PUSHING_BIT first before checking + * whether the target has changed. If the target has changed, + * this pushes the requeue race directly onto the result of the + * atomic test/set bit, so we are guaranteed that either the + * the pusher that changed the target or ourselves will requeue + * the work (but not both). + */ + clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); + smp_rmb(); + if (XFS_LSN_CMP(ailp->xa_target, target) == 0 || + test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) + return; + + tout = 50; } else if (XFS_LSN_CMP(lsn, target) >= 0) { /* * We reached the target so wait a bit longer for I/O to @@ -384,7 +508,7 @@ xfsaild_push( * start the next scan from the start of the AIL. */ tout = 50; - last_pushed_lsn = 0; + ailp->xa_last_pushed_lsn = 0; } else if ((stuck * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we @@ -396,14 +520,61 @@ xfsaild_push( * continuing from where we were. */ tout = 20; - } else { - /* more to do, but wait a short while before continuing */ - tout = 10; } - *last_lsn = last_pushed_lsn; - return tout; + + /* There is more to do, requeue us. */ + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, + msecs_to_jiffies(tout)); +} + +/* + * This routine is called to move the tail of the AIL forward. It does this by + * trying to flush items in the AIL whose lsns are below the given + * threshold_lsn. + * + * The push is run asynchronously in a workqueue, which means the caller needs + * to handle waiting on the async flush for space to become available. + * We don't want to interrupt any push that is in progress, hence we only queue + * work if we set the pushing bit approriately. + * + * We do this unlocked - we only need to know whether there is anything in the + * AIL at the time we are called. We don't need to access the contents of + * any of the objects, so the lock is not needed. + */ +void +xfs_ail_push( + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) +{ + xfs_log_item_t *lip; + + lip = xfs_ail_min(ailp); + if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) || + XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0) + return; + + /* + * Ensure that the new target is noticed in push code before it clears + * the XFS_AIL_PUSHING_BIT. + */ + smp_wmb(); + xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn); + if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) + queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); } +/* + * Push out all items in the AIL immediately + */ +void +xfs_ail_push_all( + struct xfs_ail *ailp) +{ + xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp); + + if (threshold_lsn) + xfs_ail_push(ailp, threshold_lsn); +} /* * This is to be called when an item is unlocked that may have @@ -563,7 +734,7 @@ xfs_trans_ail_delete_bulk( spin_unlock(&ailp->xa_lock); if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, + xfs_alert_tag(mp, XFS_PTAG_AILDELETE, "%s: attempting to delete a log item that is not in the AIL", __func__); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); @@ -615,7 +786,6 @@ xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; - int error; ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) @@ -624,15 +794,9 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); spin_lock_init(&ailp->xa_lock); - error = xfsaild_start(ailp); - if (error) - goto out_free_ailp; + INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); mp->m_ail = ailp; return 0; - -out_free_ailp: - kmem_free(ailp); - return error; } void @@ -641,124 +805,6 @@ xfs_trans_ail_destroy( { struct xfs_ail *ailp = mp->m_ail; - xfsaild_stop(ailp); + cancel_delayed_work_sync(&ailp->xa_work); kmem_free(ailp); } - -/* - * splice the log item list into the AIL at the given LSN. - */ -STATIC void -xfs_ail_splice( - struct xfs_ail *ailp, - struct list_head *list, - xfs_lsn_t lsn) -{ - xfs_log_item_t *next_lip; - - /* - * If the list is empty, just insert the item. - */ - if (list_empty(&ailp->xa_ail)) { - list_splice(list, &ailp->xa_ail); - return; - } - - list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) - break; - } - - ASSERT((&next_lip->li_ail == &ailp->xa_ail) || - (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)); - - list_splice_init(list, &next_lip->li_ail); - return; -} - -/* - * Delete the given item from the AIL. Return a pointer to the item. - */ -STATIC void -xfs_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_ail_check(ailp, lip); - list_del(&lip->li_ail); - xfs_trans_ail_cursor_clear(ailp, lip); -} - -/* - * Return a pointer to the first item in the AIL. - * If the AIL is empty, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_min( - struct xfs_ail *ailp) -{ - if (list_empty(&ailp->xa_ail)) - return NULL; - - return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); -} - -/* - * Return a pointer to the item which follows - * the given item in the AIL. If the given item - * is the last item in the list, then return NULL. - */ -STATIC xfs_log_item_t * -xfs_ail_next( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - if (lip->li_ail.next == &ailp->xa_ail) - return NULL; - - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); -} - -#ifdef DEBUG -/* - * Check that the list is sorted as it should be. - */ -STATIC void -xfs_ail_check( - struct xfs_ail *ailp, - xfs_log_item_t *lip) -{ - xfs_log_item_t *prev_lip; - - if (list_empty(&ailp->xa_ail)) - return; - - /* - * Check the next and previous entries are valid. - */ - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - - prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); - - -#ifdef XFS_TRANS_DEBUG - /* - * Walk the list checking lsn ordering, and that every entry has the - * XFS_LI_IN_AIL flag set. This is really expensive, so only do it - * when specifically debugging the transaction subsystem. - */ - prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); - list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (&prev_lip->li_ail != &ailp->xa_ail) - ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); - ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); - prev_lip = lip; - } -#endif /* XFS_TRANS_DEBUG */ -} -#endif /* DEBUG */ diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c47918c302a5..03b3b7f85a3b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -305,7 +305,7 @@ xfs_trans_read_buf( if (xfs_error_target == target) { if (((xfs_req_num++) % xfs_error_mod) == 0) { xfs_buf_relse(bp); - cmn_err(CE_DEBUG, "Returning error!\n"); + xfs_debug(mp, "Returning error!"); return XFS_ERROR(EIO); } } @@ -383,7 +383,8 @@ xfs_trans_read_buf( bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK); if (bp == NULL) { *bpp = NULL; - return 0; + return (flags & XBF_TRYLOCK) ? + 0 : XFS_ERROR(ENOMEM); } if (XFS_BUF_GETERROR(bp) != 0) { XFS_BUF_SUPER_STALE(bp); @@ -403,7 +404,7 @@ xfs_trans_read_buf( xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); xfs_buf_relse(bp); - cmn_err(CE_DEBUG, "Returning trans error!\n"); + xfs_debug(mp, "Returning trans error!"); return XFS_ERROR(EIO); } } @@ -427,7 +428,7 @@ shutdown_abort: */ #if defined(DEBUG) if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) - cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); + xfs_notice(mp, "about to pop assert, bp == 0x%p", bp); #endif ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != (XBF_STALE|XBF_DELWRI)); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ccb34532768b..048b0c689d3e 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug( #endif /* - * Get an inode and join it to the transaction. - */ -int -xfs_trans_iget( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp) -{ - int error; - - error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); - if (!error && tp) { - xfs_trans_ijoin(tp, *ipp); - (*ipp)->i_itemp->ili_lock_flags = lock_flags; - } - return error; -} - -/* * Add a locked inode to the transaction. * * The inode must be locked, and it cannot be associated with any transaction. @@ -103,7 +81,7 @@ xfs_trans_ijoin( * * * Grabs a reference to the inode which will be dropped when the transaction - * is commited. The inode will also be unlocked at that point. The inode + * is committed. The inode will also be unlocked at that point. The inode * must be locked, and it cannot be associated with any transaction. */ void diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 35162c238fa3..6b164e9e9a1f 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -65,16 +65,22 @@ struct xfs_ail_cursor { struct xfs_ail { struct xfs_mount *xa_mount; struct list_head xa_ail; - uint xa_gen; - struct task_struct *xa_task; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; spinlock_t xa_lock; + struct delayed_work xa_work; + xfs_lsn_t xa_last_pushed_lsn; + unsigned long xa_flags; }; +#define XFS_AIL_PUSHING_BIT 0 + /* * From xfs_trans_ail.c */ + +extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ + void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, struct xfs_log_item **log_items, int nr_items, xfs_lsn_t lsn) __releases(ailp->xa_lock); @@ -98,12 +104,13 @@ xfs_trans_ail_delete( xfs_trans_ail_delete_bulk(ailp, &lip, 1); } -void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_ail_push_all(struct xfs_ail *); +xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); + void xfs_trans_unlocked_item(struct xfs_ail *, xfs_log_item_t *); -xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); - struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn); @@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); -void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); -int xfsaild_start(struct xfs_ail *); -void xfsaild_stop(struct xfs_ail *); - #if BITS_PER_LONG != 64 static inline void xfs_trans_ail_copy_lsn( diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 26d1867d8156..65584b55607d 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -typedef __uint32_t xlog_tid_t; /* transaction ID type */ - /* * These types are 64 bits on disk but are either 32 or 64 bits in memory. * Disk based types: diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index d8e6f8cd6f0c..b7a5fe7c52c8 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -953,7 +953,7 @@ xfs_release( * If we previously truncated this file and removed old data * in the process, we want to initiate "early" writeout on * the last close. This is an attempt to combat the notorious - * NULL files problem which is particularly noticable from a + * NULL files problem which is particularly noticeable from a * truncate down, buffered (re-)write (delalloc), followed by * a crash. What we are effectively doing here is * significantly reducing the time window where we'd otherwise @@ -982,7 +982,7 @@ xfs_release( * * Further, check if the inode is being opened, written and * closed frequently and we have delayed allocation blocks - * oustanding (e.g. streaming writes from the NFS server), + * outstanding (e.g. streaming writes from the NFS server), * truncating the blocks past EOF will cause fragmentation to * occur. * @@ -1189,9 +1189,8 @@ xfs_inactive( * inode might be lost for a long time or forever. */ if (!XFS_FORCED_SHUTDOWN(mp)) { - cmn_err(CE_NOTE, - "xfs_inactive: xfs_ifree() returned an error = %d on %s", - error, mp->m_fsname); + xfs_notice(mp, "%s: xfs_ifree returned error %d", + __func__, error); xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); } xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); @@ -1208,12 +1207,12 @@ xfs_inactive( */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) - xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " - "xfs_bmap_finish() returned error %d", error); + xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", + __func__, error); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) - xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " - "xfs_trans_commit() returned error %d", error); + xfs_notice(mp, "%s: xfs_trans_commit returned error %d", + __func__, error); } /* @@ -1310,7 +1309,7 @@ xfs_create( error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) - goto std_return; + return error; if (is_dir) { rdev = 0; @@ -1390,12 +1389,6 @@ xfs_create( } /* - * At this point, we've gotten a newly allocated inode. - * It is locked (and joined to the transaction). - */ - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - /* * Now we join the directory inode to the transaction. We do not do it * earlier because xfs_dir_ialloc might commit the previous transaction * (and release all the locks). An error from here on will result in @@ -1440,22 +1433,13 @@ xfs_create( */ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); - /* - * xfs_trans_commit normally decrements the vnode ref count - * when it unlocks the inode. Since we want to return the - * vnode to the caller, we bump the vnode ref count now. - */ - IHOLD(ip); - error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) - goto out_abort_rele; + goto out_bmap_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) { - IRELE(ip); - goto out_dqrele; - } + if (error) + goto out_release_inode; xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -1469,27 +1453,21 @@ xfs_create( cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); - out_dqrele: + out_release_inode: + /* + * Wait until after the current transaction is aborted to + * release the inode. This prevents recursive transactions + * and deadlocks from xfs_inactive. + */ + if (ip) + IRELE(ip); + xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); - std_return: return error; - - out_abort_rele: - /* - * Wait until after the current transaction is aborted to - * release the inode. This prevents recursive transactions - * and deadlocks from xfs_inactive. - */ - xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; - xfs_trans_cancel(tp, cancel_flags); - IRELE(ip); - unlock_dp_on_error = B_FALSE; - goto out_dqrele; } #ifdef DEBUG @@ -2114,9 +2092,8 @@ xfs_symlink( XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, &free_list); - if (error) { - goto error1; - } + if (error) + goto error2; if (resblks) resblks -= fs_blocks; @@ -2148,7 +2125,7 @@ xfs_symlink( error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, &first_block, &free_list, resblks); if (error) - goto error1; + goto error2; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); @@ -2161,13 +2138,6 @@ xfs_symlink( xfs_trans_set_sync(tp); } - /* - * xfs_trans_commit normally decrements the vnode ref count - * when it unlocks the inode. Since we want to return the - * vnode to the caller, we bump the vnode ref count now. - */ - IHOLD(ip); - error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error2; @@ -2861,7 +2831,8 @@ xfs_change_file_space( ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); + if (attr_flags & XFS_ATTR_SYNC) + xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index f6702927eee4..3bcd23353d6c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ #define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ +#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_release(struct xfs_inode *ip); |
