diff options
| author | Alex Shi <alex.shi@linaro.org> | 2016-02-29 10:18:54 +0800 |
|---|---|---|
| committer | Alex Shi <alex.shi@linaro.org> | 2016-02-29 10:18:54 +0800 |
| commit | 582ee3a96f6a108f589bdc4ce8cc8176c8d763e5 (patch) | |
| tree | ffb7c064fe0b7202ebc770cd3c69f5b802893215 /fs | |
| parent | 02bbd06e489a9f56910973535152d3ec47f3fdcc (diff) | |
| parent | 12a08707dec7ff067688710aee0d4698f6da98a6 (diff) | |
Merge branch 'linux-linaro-lsk-v4.4' into linux-linaro-lsk-v4.4-android
Diffstat (limited to 'fs')
41 files changed, 396 insertions, 143 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index d453d62ab0c6..e2f659dc5745 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1417,7 +1417,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, read_extent_buffer(eb, dest + bytes_left, name_off, name_len); if (eb != eb_in) { - btrfs_tree_read_unlock_blocking(eb); + if (!path->skip_locking) + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } ret = btrfs_find_item(fs_root, path, parent, 0, @@ -1437,9 +1438,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ if (eb != eb_in) { - atomic_inc(&eb->refs); - btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (!path->skip_locking) + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + path->nodes[0] = NULL; + path->locks[0] = 0; } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index e0941fbb913c..02b934d0ee65 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1694,7 +1694,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, * */ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list) + struct list_head *ins_list, bool *emitted) { struct btrfs_dir_item *di; struct btrfs_delayed_item *curr, *next; @@ -1738,6 +1738,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, if (over) return 1; + *emitted = true; } return 0; } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f70119f25421..0167853c84ae 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, int btrfs_should_delete_dir_index(struct list_head *del_list, u64 index); int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, - struct list_head *ins_list); + struct list_head *ins_list, bool *emitted); /* for init */ int __init btrfs_delayed_inode_init(void); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 974be09e7556..0ddca6734494 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1762,7 +1762,6 @@ static int cleaner_kthread(void *arg) int again; struct btrfs_trans_handle *trans; - set_freezable(); do { again = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a70c5790f8f5..54b5f0de623b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5741,6 +5741,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) char *name_ptr; int name_len; int is_curr = 0; /* ctx->pos points to the current index? */ + bool emitted; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -5769,6 +5770,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (ret < 0) goto err; + emitted = false; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; @@ -5848,6 +5850,7 @@ skip: if (over) goto nopos; + emitted = true; di_len = btrfs_dir_name_len(leaf, di) + btrfs_dir_data_len(leaf, di) + sizeof(*di); di_cur += di_len; @@ -5860,11 +5863,20 @@ next: if (key_type == BTRFS_DIR_INDEX_KEY) { if (is_curr) ctx->pos++; - ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); + ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); if (ret) goto nopos; } + /* + * If we haven't emitted any dir entry, we must not touch ctx->pos as + * it was was set to the termination value in previous call. We assume + * that "." and ".." were emitted if we reach this point and set the + * termination value as well for an empty directory. + */ + if (ctx->pos > 2 && !emitted) + goto nopos; + /* Reached end of directory/root. Bump pos past the last item. */ ctx->pos++; @@ -7985,6 +7997,7 @@ static void btrfs_endio_direct_read(struct bio *bio) kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); if (io_bio->end_io) @@ -8030,6 +8043,7 @@ out_test: kfree(dip); + dio_bio->bi_error = bio->bi_error; dio_end_io(dio_bio, bio->bi_error); bio_put(bio); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138eb85e..08fd3f0f34fd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2782,24 +2782,29 @@ out: static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; page = grab_cache_page(inode->i_mapping, index); if (!page) - return NULL; + return ERR_PTR(-ENOMEM); if (!PageUptodate(page)) { - if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, - 0)) - return NULL; + int ret; + + ret = btrfs_readpage(NULL, page); + if (ret) + return ERR_PTR(ret); lock_page(page); if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); - return NULL; + return ERR_PTR(-EIO); + } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + return ERR_PTR(-EAGAIN); } } - unlock_page(page); return page; } @@ -2811,17 +2816,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, pgoff_t index = off >> PAGE_CACHE_SHIFT; for (i = 0; i < num_pages; i++) { +again: pages[i] = extent_same_get_page(inode, index + i); - if (!pages[i]) - return -ENOMEM; + if (IS_ERR(pages[i])) { + int err = PTR_ERR(pages[i]); + + if (err == -EAGAIN) + goto again; + pages[i] = NULL; + return err; + } } return 0; } -static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) +static int lock_extent_range(struct inode *inode, u64 off, u64 len, + bool retry_range_locking) { - /* do any pending delalloc/csum calc on src, one way or - another, and lock file content */ + /* + * Do any pending delalloc/csum calculations on inode, one way or + * another, and lock file content. + * The locking order is: + * + * 1) pages + * 2) range in the inode's io tree + */ while (1) { struct btrfs_ordered_extent *ordered; lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); @@ -2839,8 +2858,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); if (ordered) btrfs_put_ordered_extent(ordered); + if (!retry_range_locking) + return -EAGAIN; btrfs_wait_ordered_range(inode, off, len); } + return 0; } static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) @@ -2865,15 +2887,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); } -static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len, + bool retry_range_locking) { + int ret; + if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - lock_extent_range(inode1, loff1, len); - lock_extent_range(inode2, loff2, len); + ret = lock_extent_range(inode1, loff1, len, retry_range_locking); + if (ret) + return ret; + ret = lock_extent_range(inode2, loff2, len, retry_range_locking); + if (ret) + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, + loff1 + len - 1); + return ret; } struct cmp_pages { @@ -2889,11 +2920,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) for (i = 0; i < cmp->num_pages; i++) { pg = cmp->src_pages[i]; - if (pg) + if (pg) { + unlock_page(pg); page_cache_release(pg); + } pg = cmp->dst_pages[i]; - if (pg) + if (pg) { + unlock_page(pg); page_cache_release(pg); + } } kfree(cmp->src_pages); kfree(cmp->dst_pages); @@ -2954,6 +2989,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, src_page = cmp->src_pages[i]; dst_page = cmp->dst_pages[i]; + ASSERT(PageLocked(src_page)); + ASSERT(PageLocked(dst_page)); addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -3066,14 +3103,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } +again: ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); if (ret) goto out_unlock; if (same_inode) - lock_extent_range(src, same_lock_start, same_lock_len); + ret = lock_extent_range(src, same_lock_start, same_lock_len, + false); else - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, + false); + /* + * If one of the inodes has dirty pages in the respective range or + * ordered extents, we need to flush dellaloc and wait for all ordered + * extents in the range. We must unlock the pages and the ranges in the + * io trees to avoid deadlocks when flushing delalloc (requires locking + * pages) and when waiting for ordered extents to complete (they require + * range locking). + */ + if (ret == -EAGAIN) { + /* + * Ranges in the io trees already unlocked. Now unlock all + * pages before waiting for all IO to complete. + */ + btrfs_cmp_data_free(&cmp); + if (same_inode) { + btrfs_wait_ordered_range(src, same_lock_start, + same_lock_len); + } else { + btrfs_wait_ordered_range(src, loff, len); + btrfs_wait_ordered_range(dst, dst_loff, len); + } + goto again; + } + ASSERT(ret == 0); + if (WARN_ON(ret)) { + /* ranges in the io trees already unlocked */ + btrfs_cmp_data_free(&cmp); + return ret; + } /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); @@ -3895,9 +3964,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 lock_start = min_t(u64, off, destoff); u64 lock_len = max_t(u64, off, destoff) + len - lock_start; - lock_extent_range(src, lock_start, lock_len); + ret = lock_extent_range(src, lock_start, lock_len, true); } else { - btrfs_double_extent_lock(src, off, inode, destoff, len); + ret = btrfs_double_extent_lock(src, off, inode, destoff, len, + true); + } + ASSERT(ret == 0); + if (WARN_ON(ret)) { + /* ranges in the io trees already unlocked */ + goto out_unlock; } ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a23399e8e3ab..9e084477d320 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1257,6 +1257,15 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, int ret; int slot; struct extent_buffer *l; + u64 min_search_start; + + /* + * We don't want to overwrite the superblock on the drive nor any area + * used by the boot loader (grub for example), so we make sure to start + * at an offset of at least 1MB. + */ + min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + search_start = max(search_start, min_search_start); path = btrfs_alloc_path(); if (!path) @@ -1397,18 +1406,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { - struct btrfs_root *root = device->dev_root; - u64 search_start; - /* FIXME use last free of some kind */ - - /* - * we don't want to overwrite the superblock on the drive, - * so we make sure to start at an offset of at least 1MB - */ - search_start = max(root->fs_info->alloc_start, 1024ull * 1024); return find_free_dev_extent_start(trans->transaction, device, - num_bytes, search_start, start, len); + num_bytes, 0, start, len); } static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, @@ -6512,6 +6512,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 7febcf2475c5..50b268483302 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - pr_err("CIFS VFS: %pV", &vaf); + pr_err_ratelimited("CIFS VFS: %pV", &vaf); va_end(args); } diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index f40fbaca1b2a..66cf0f9fff89 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); /* information message: e.g., configuration, major event */ #define cifs_dbg(type, fmt, ...) \ do { \ - if (type == FYI) { \ - if (cifsFYI & CIFS_INFO) { \ - pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ - } \ + if (type == FYI && cifsFYI & CIFS_INFO) { \ + pr_debug_ratelimited("%s: " \ + fmt, __FILE__, ##__VA_ARGS__); \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - pr_debug(fmt, ##__VA_ARGS__); \ + pr_debug_ratelimited(fmt, ##__VA_ARGS__); \ } \ } while (0) diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index afa09fce8151..e682b36a210f 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); if (!ses->auth_key.response) { - rc = ENOMEM; + rc = -ENOMEM; ses->auth_key.len = 0; goto setup_ntlmv2_rsp_ret; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ecb0803bdb0e..3c194ff0d2f0 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -368,7 +368,6 @@ cifs_reconnect(struct TCP_Server_Info *server) server->session_key.response = NULL; server->session_key.len = 0; server->lstrp = jiffies; - mutex_unlock(&server->srv_mutex); /* mark submitted MIDs for retry and issue callback */ INIT_LIST_HEAD(&retry_list); @@ -381,6 +380,7 @@ cifs_reconnect(struct TCP_Server_Info *server) list_move(&mid_entry->qhead, &retry_list); } spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__); list_for_each_safe(tmp, tmp2, &retry_list) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0557c45e9c33..b30a4a6d98a0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * if buggy server returns . and .. late do we want to * check for that here? */ + *tmp_buf = 0; rc = cifs_filldir(current_entry, file, ctx, tmp_buf, max_len); if (rc) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 2a24c524fb9a..87abe8ed074c 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, cifs_in_send_dec(server); cifs_save_when_sent(mid); - if (rc < 0) + if (rc < 0) { server->sequence_number -= 2; + cifs_delete_mid(mid); + } + mutex_unlock(&server->srv_mutex); if (rc == 0) return 0; - cifs_delete_mid(mid); add_credits_and_wake_if(server, credits, optype); return rc; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c35ffdc12bba..706de324f2a6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) mutex_unlock(&allocated_ptys_lock); } +/* + * pty code needs to hold extra references in case of last /dev/tty close + */ + +void devpts_add_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + atomic_inc(&sb->s_active); + ihold(ptmx_inode); +} + +void devpts_del_ref(struct inode *ptmx_inode) +{ + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + + iput(ptmx_inode); + deactivate_super(sb); +} + /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ec0668a60678..fe1f50fe764f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { - err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init block bitmap for group " + "%u: %d", block_group, err); goto out; + } goto verify; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index c5882b36e558..9a16d1e75a49 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -213,9 +213,11 @@ retry: res = -ENOKEY; goto out; } + down_read(&keyring_key->sem); ukp = user_key_payload(keyring_key); if (ukp->datalen != sizeof(struct ext4_encryption_key)) { res = -EINVAL; + up_read(&keyring_key->sem); goto out; } master_key = (struct ext4_encryption_key *)ukp->data; @@ -226,10 +228,12 @@ retry: "ext4: key size incorrect: %d\n", master_key->size); res = -ENOKEY; + up_read(&keyring_key->sem); goto out; } res = ext4_derive_key_aes(ctx.nonce, master_key->raw, raw_key); + up_read(&keyring_key->sem); if (res) goto out; got_key: diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 1b8024d26f65..53f2b98a69f3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); grp = ext4_get_group_info(sb, block_group); if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) percpu_counter_sub(&sbi->s_freeclusters_counter, @@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) + if (err) { + ext4_error(sb, "Failed to init inode bitmap for group " + "%u: %d", block_group, err); goto out; + } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index fb6f11709ae6..e032a0423e35 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, ext4_lblk_t orig_blk_offset, donor_blk_offset; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int tmp_data_size, data_size, replaced_size; - int err2, jblocks, retries = 0; + int i, err2, jblocks, retries = 0; int replaced_count = 0; int from = data_offset_in_page << orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; struct super_block *sb = orig_inode->i_sb; + struct buffer_head *bh = NULL; /* * It needs twice the amount of ordinary journal buffers because @@ -380,8 +381,16 @@ data_copy: } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - *err = __block_write_begin(pagep[0], from, replaced_size, - ext4_get_block); + if (!page_has_buffers(pagep[0])) + create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); + bh = page_buffers(pagep[0]); + for (i = 0; i < data_offset_in_page; i++) + bh = bh->b_this_page; + for (i = 0; i < block_len_in_page; i++) { + *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); + if (*err < 0) + break; + } if (!*err) *err = block_commit_write(pagep[0], from, from + replaced_size); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ad62d7acc315..34038e3598d5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) goto out2; flex_gd->count = flexbg_size; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index de4bdfac0cec..595ebdb41846 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -463,6 +463,7 @@ hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) */ vma_interval_tree_foreach(vma, root, start, end ? end : ULONG_MAX) { unsigned long v_offset; + unsigned long v_end; /* * Can the expression below overflow on 32-bit arches? @@ -475,15 +476,17 @@ hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) else v_offset = 0; - if (end) { - end = ((end - start) << PAGE_SHIFT) + - vma->vm_start + v_offset; - if (end > vma->vm_end) - end = vma->vm_end; - } else - end = vma->vm_end; + if (!end) + v_end = vma->vm_end; + else { + v_end = ((end - vma->vm_pgoff) << PAGE_SHIFT) + + vma->vm_start; + if (v_end > vma->vm_end) + v_end = vma->vm_end; + } - unmap_hugepage_range(vma, vma->vm_start + v_offset, end, NULL); + unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end, + NULL); } } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 03516c80855a..2a2e2d8ddee5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -145,7 +145,7 @@ static bool ff_mirror_match_fh(const struct nfs4_ff_layout_mirror *m1, return false; for (i = 0; i < m1->fh_versions_cnt; i++) { bool found_fh = false; - for (j = 0; j < m2->fh_versions_cnt; i++) { + for (j = 0; j < m2->fh_versions_cnt; j++) { if (nfs_compare_fh(&m1->fh_versions[i], &m2->fh_versions[j]) == 0) { found_fh = true; @@ -1859,11 +1859,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - if (ff_layout_encode_ioerr(flo, xdr, args)) - goto out; - + ff_layout_encode_ioerr(flo, xdr, args); ff_layout_encode_iostats(flo, xdr, args); -out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c7e8b87da5b2..3e2071a177fd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1641,6 +1641,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long invalid = 0; unsigned long now = jiffies; unsigned long save_cache_validity; + bool cache_revalidated = true; dfprintk(VFS, "NFS: %s(%s/%lu fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1702,22 +1703,28 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else + } else { nfsi->cache_validity |= save_cache_validity; + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } else if (server->caps & NFS_CAP_MTIME) + } else if (server->caps & NFS_CAP_MTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } else if (server->caps & NFS_CAP_CTIME) + } else if (server->caps & NFS_CAP_CTIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1737,19 +1744,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)cur_isize, (long long)new_isize); } - } else + } else { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - else if (server->caps & NFS_CAP_ATIME) + else if (server->caps & NFS_CAP_ATIME) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { @@ -1758,36 +1769,42 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_mode = newmode; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - } else if (server->caps & NFS_CAP_MODE) + } else if (server->caps & NFS_CAP_MODE) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (!uid_eq(inode->i_uid, fattr->uid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } else if (server->caps & NFS_CAP_OWNER) + } else if (server->caps & NFS_CAP_OWNER) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (!gid_eq(inode->i_gid, fattr->gid)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } else if (server->caps & NFS_CAP_OWNER_GROUP) + } else if (server->caps & NFS_CAP_OWNER_GROUP) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1796,19 +1813,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; set_nlink(inode, fattr->nlink); } - } else if (server->caps & NFS_CAP_NLINK) + } else if (server->caps & NFS_CAP_NLINK) { nfsi->cache_validity |= save_cache_validity & (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); + cache_revalidated = false; + } if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* * report the blocks in 512byte units */ inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); - } - if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) + } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; + else + cache_revalidated = false; /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { @@ -1818,9 +1838,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { - if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + if (cache_revalidated) { + if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, + nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { + nfsi->attrtimeo <<= 1; + if (nfsi->attrtimeo > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + } nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ @@ -1829,7 +1853,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Don't declare attrcache up to date if there were no attrs! */ - if (fattr->valid != 0) + if (cache_revalidated) invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 89818036f035..f496ed721d27 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1385,6 +1385,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ + spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { nfs4_stateid_copy(&state->stateid, deleg_stateid); @@ -1393,7 +1394,6 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s if (open_stateid != NULL) nfs_set_open_stateid_locked(state, open_stateid, fmode); write_sequnlock(&state->seqlock); - spin_lock(&state->owner->so_lock); update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); } @@ -8054,7 +8054,6 @@ static void nfs4_layoutreturn_release(void *calldata) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); - lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84f2f8079466..4e2162b355db 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, spin_lock(&dlm->master_lock); ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, target, dlm->node_num); + /* get an extra reference on the mle. + * otherwise the assert_master from the new + * master will destroy this. + */ + dlm_get_mle_inuse(mle); spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); @@ -2554,6 +2559,7 @@ fail: if (mle_added) { dlm_mle_detach_hb_events(dlm, mle); dlm_put_mle(mle); + dlm_put_mle_inuse(mle); } else if (mle) { kmem_cache_free(dlm_mle_cache, mle); mle = NULL; @@ -2571,17 +2577,6 @@ fail: * ensure that all assert_master work is flushed. */ flush_workqueue(dlm->dlm_worker); - /* get an extra reference on the mle. - * otherwise the assert_master from the new - * master will destroy this. - * also, make sure that all callers of dlm_get_mle - * take both dlm->spinlock and dlm->master_lock */ - spin_lock(&dlm->spinlock); - spin_lock(&dlm->master_lock); - dlm_get_mle_inuse(mle); - spin_unlock(&dlm->master_lock); - spin_unlock(&dlm->spinlock); - /* notify new node and send all lock state */ /* call send_one_lockres with migration flag. * this serves as notice to the target node that a @@ -3312,6 +3307,15 @@ top: mle->new_master != dead_node) continue; + if (mle->new_master == dead_node && mle->inuse) { + mlog(ML_NOTICE, "%s: target %u died during " + "migration from %u, the MLE is " + "still keep used, ignore it!\n", + dlm->name, dead_node, + mle->master); + continue; + } + /* If we have reached this point, this mle needs to be * removed from the list and freed. */ dlm_clean_migration_mle(dlm, mle); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9e4f862d20fe..42f0cae93a0a 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2360,6 +2360,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) break; } } + dlm_lockres_clear_refmap_bit(dlm, res, + dead_node); spin_unlock(&res->spinlock); continue; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 20276e340339..b002acf50203 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1390,6 +1390,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb, unsigned int gen; int noqueue_attempted = 0; int dlm_locked = 0; + int kick_dc = 0; if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) { mlog_errno(-EINVAL); @@ -1524,7 +1525,12 @@ update_holders: unlock: lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); + /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ + kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); + spin_unlock_irqrestore(&lockres->l_lock, flags); + if (kick_dc) + ocfs2_wake_downconvert_thread(osb); out: /* * This is helping work around a lock inversion between the page lock diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0a8983492d91..eff6319d5037 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -22,9 +22,9 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) { - ssize_t list_size, size; - char *buf, *name, *value; - int error; + ssize_t list_size, size, value_size = 0; + char *buf, *name, *value = NULL; + int uninitialized_var(error); if (!old->d_inode->i_op->getxattr || !new->d_inode->i_op->getxattr) @@ -41,29 +41,40 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new) if (!buf) return -ENOMEM; - error = -ENOMEM; - value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); - if (!value) - goto out; - list_size = vfs_listxattr(old, buf, list_size); if (list_size <= 0) { error = list_size; - goto out_free_value; + goto out; } for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { - size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); - if (size <= 0) { +retry: + size = vfs_getxattr(old, name, value, value_size); + if (size == -ERANGE) + size = vfs_getxattr(old, name, NULL, 0); + + if (size < 0) { error = size; - goto out_free_value; + break; + } + + if (size > value_size) { + void *new; + + new = krealloc(value, size, GFP_KERNEL); + if (!new) { + error = -ENOMEM; + break; + } + value = new; + value_size = size; + goto retry; } + error = vfs_setxattr(new, name, value, size, 0); if (error) - goto out_free_value; + break; } - -out_free_value: kfree(value); out: kfree(buf); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 4060ffde8722..b29036aa8d7c 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -42,6 +42,19 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) int err; struct dentry *upperdentry; + /* + * Check for permissions before trying to copy-up. This is redundant + * since it will be rechecked later by ->setattr() on upper dentry. But + * without this, copy-up can be triggered by just about anybody. + * + * We don't initialize inode->size, which just means that + * inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not + * check for a swapfile (which this won't be anyway). + */ + err = inode_change_ok(dentry->d_inode, attr); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 70e9af551600..adcb1398c481 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -571,7 +571,8 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) (int) PTR_ERR(dentry)); continue; } - ovl_cleanup(upper->d_inode, dentry); + if (dentry->d_inode) + ovl_cleanup(upper->d_inode, dentry); dput(dentry); } mutex_unlock(&upper->d_inode->i_mutex); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e38ee0fed24a..f42c9407fbad 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/namei.h> +#include <linux/pagemap.h> #include <linux/xattr.h> #include <linux/security.h> #include <linux/mount.h> @@ -910,6 +911,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) } sb->s_stack_depth = 0; + sb->s_maxbytes = MAX_LFS_FILESIZE; if (ufs->config.upperdir) { if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); @@ -1053,6 +1055,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) root_dentry->d_fsdata = oe; + ovl_copyattr(ovl_dentry_real(root_dentry)->d_inode, + root_dentry->d_inode); + sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_op = &ovl_super_operations; sb->s_root = root_dentry; diff --git a/fs/proc/array.c b/fs/proc/array.c index d73291f5f0fc..b6c00ce0e29e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -395,7 +395,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, state = *get_task_state(task); vsize = eip = esp = 0; - permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); + permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT); mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); diff --git a/fs/proc/base.c b/fs/proc/base.c index 3b6962c52965..57df8a52e780 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -403,7 +403,7 @@ static const struct file_operations proc_pid_cmdline_ops = { static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; do { @@ -430,7 +430,8 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) + && !lookup_symbol_name(wchan, symname)) seq_printf(m, "%s", symname); else seq_putc(m, '0'); @@ -444,7 +445,7 @@ static int lock_trace(struct task_struct *task) int err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) return err; - if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { mutex_unlock(&task->signal->cred_guard_mutex); return -EPERM; } @@ -697,7 +698,7 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_access(task, PTRACE_MODE_READ); + allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; @@ -732,7 +733,7 @@ static bool has_pid_permissions(struct pid_namespace *pid, return true; if (in_group_p(pid->pid_gid)) return true; - return ptrace_may_access(task, PTRACE_MODE_READ); + return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } @@ -809,7 +810,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) struct mm_struct *mm = ERR_PTR(-ESRCH); if (task) { - mm = mm_access(task, mode); + mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (!IS_ERR_OR_NULL(mm)) { @@ -1856,7 +1857,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) if (!task) goto out_notask; - mm = mm_access(task, PTRACE_MODE_READ); + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) goto out; @@ -2007,7 +2008,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out; result = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = -ENOENT; @@ -2060,7 +2061,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) goto out; ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; @@ -2530,7 +2531,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh if (result) return result; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index f6e8354b8cea..1b0ea4a5d89e 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,7 +42,7 @@ static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) if (!task) return error; - if (ptrace_may_access(task, PTRACE_MODE_READ)) { + if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { error = ns_get_path(&ns_path, task, ns_ops); if (!error) nd_jump_link(&ns_path); @@ -63,7 +63,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!task) return res; - if (ptrace_may_access(task, PTRACE_MODE_READ)) { + if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { res = ns_get_name(name, sizeof(name), task, ns_ops); if (res >= 0) res = readlink_copy(buffer, buflen, name); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 91698054b965..67aa7e63a5c1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1535,18 +1535,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { + pte_t huge_pte = huge_ptep_get(pte); struct numa_maps *md; struct page *page; - if (!pte_present(*pte)) + if (!pte_present(huge_pte)) return 0; - page = pte_page(*pte); + page = pte_page(huge_pte); if (!page) return 0; md = walk->private; - gather_stats(page, md, pte_dirty(*pte), 1); + gather_stats(page, md, pte_dirty(huge_pte), 1); return 0; } diff --git a/fs/timerfd.c b/fs/timerfd.c index b94fa6c3c6eb..053818dd6c18 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else - remaining = hrtimer_expires_remaining(&ctx->t.tmr); + remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 8d0b3ade0ff0..566df9b5a6cb 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -2047,14 +2047,29 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, epos->offset += adsize; } +/* + * Only 1 indirect extent in a row really makes sense but allow upto 16 in case + * someone does some weird stuff. + */ +#define UDF_MAX_INDIR_EXTS 16 + int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, struct kernel_lb_addr *eloc, uint32_t *elen, int inc) { int8_t etype; + unsigned int indirections = 0; while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) { int block; + + if (++indirections > UDF_MAX_INDIR_EXTS) { + udf_err(inode->i_sb, + "too many indirect extents in inode %lu\n", + inode->i_ino); + return -1; + } + epos->block = *eloc; epos->offset = sizeof(struct allocExtDesc); brelse(epos->bh); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index ab478e62baae..e788a05aab83 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -128,11 +128,15 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) if (c < 0x80U) utf_o->u_name[utf_o->u_len++] = (uint8_t)c; else if (c < 0x800U) { + if (utf_o->u_len > (UDF_NAME_LEN - 4)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6)); utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f)); } else { + if (utf_o->u_len > (UDF_NAME_LEN - 5)) + break; utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12)); utf_o->u_name[utf_o->u_len++] = @@ -173,17 +177,22 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) { unsigned c, i, max_val, utf_char; - int utf_cnt, u_len; + int utf_cnt, u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; utf_char = 0U; utf_cnt = 0U; for (i = 0U; i < utf->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; + c = (uint8_t)utf->u_name[i]; /* Complete a multi-byte UTF-8 character */ @@ -225,6 +234,7 @@ try_again: if (max_val == 0xffU) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } goto error_out; @@ -277,7 +287,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, c = (c << 8) | ocu[i++]; len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; @@ -295,15 +305,19 @@ static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int len; unsigned i, max_val; uint16_t uni_char; - int u_len; + int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; max_val = 0xffU; + u_ch = 1; try_again: u_len = 0U; for (i = 0U; i < uni->u_len; i++) { + /* Name didn't fit? */ + if (u_len + 1 + u_ch >= length) + return 0; len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; @@ -316,6 +330,7 @@ try_again: if (uni_char > max_val) { max_val = 0xffffU; ocu[0] = (uint8_t)0x10U; + u_ch = 2; goto try_again; } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 8774498ce0ff..e2536bb1c760 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -786,7 +786,7 @@ typedef struct xfs_agfl { __be64 agfl_lsn; __be32 agfl_crc; __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ -} xfs_agfl_t; +} __attribute__((packed)) xfs_agfl_t; #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 268c00f4f83a..65485cfc4ade 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -62,11 +62,12 @@ xfs_inobp_check( * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * - * If the readahead buffer is invalid, we don't want to mark it with an error, - * but we do want to clear the DONE status of the buffer so that a followup read - * will re-read it from disk. This will ensure that we don't get an unnecessary - * warnings during log recovery and we don't get unnecssary panics on debug - * kernels. + * If the readahead buffer is invalid, we need to mark it with an error and + * clear the DONE status of the buffer so that a followup read will re-read it + * from disk. We don't report the error otherwise to avoid warnings during log + * recovery and we don't get unnecssary panics on debug kernels. We use EIO here + * because all we want to do is say readahead failed; there is no-one to report + * the error to, so this will distinguish it from a non-ra verifier failure. */ static void xfs_inode_buf_verify( @@ -93,6 +94,7 @@ xfs_inode_buf_verify( XFS_RANDOM_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; + xfs_buf_ioerror(bp, -EIO); return; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 3243cdf97f33..39090fc56f09 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -604,6 +604,13 @@ found: } } + /* + * Clear b_error if this is a lookup from a caller that doesn't expect + * valid data to be found in the buffer. + */ + if (!(flags & XBF_READ)) + xfs_buf_ioerror(bp, 0); + XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); return bp; @@ -1520,6 +1527,16 @@ xfs_wait_buftarg( LIST_HEAD(dispose); int loop = 0; + /* + * We need to flush the buffer workqueue to ensure that all IO + * completion processing is 100% done. Just waiting on buffer locks is + * not sufficient for async IO as the reference count held over IO is + * not released until after the buffer lock is dropped. Hence we need to + * ensure here that all reference counts have been dropped before we + * start walking the LRU list. + */ + drain_workqueue(btp->bt_mount->m_buf_workqueue); + /* loop until there is nothing left on the lru list. */ while (list_lru_count(&btp->bt_lru)) { list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele, diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index aa67339b9537..4f18fd92ca13 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -497,7 +497,6 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; - set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) |
