diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/autofs4/root.c | 5 | ||||
| -rw-r--r-- | fs/cachefiles/internal.h | 1 | ||||
| -rw-r--r-- | fs/cachefiles/namei.c | 98 | ||||
| -rw-r--r-- | fs/cachefiles/security.c | 4 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 6 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 19 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 4 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 34 | ||||
| -rw-r--r-- | fs/ceph/messenger.c | 17 | ||||
| -rw-r--r-- | fs/ceph/messenger.h | 1 | ||||
| -rw-r--r-- | fs/ceph/osd_client.c | 26 | ||||
| -rw-r--r-- | fs/ceph/osd_client.h | 3 | ||||
| -rw-r--r-- | fs/ceph/osdmap.c | 29 | ||||
| -rw-r--r-- | fs/ceph/osdmap.h | 2 | ||||
| -rw-r--r-- | fs/ceph/rados.h | 1 | ||||
| -rw-r--r-- | fs/ceph/super.c | 23 | ||||
| -rw-r--r-- | fs/cifs/cifsglob.h | 1 | ||||
| -rw-r--r-- | fs/cifs/inode.c | 21 | ||||
| -rw-r--r-- | fs/compat.c | 2 | ||||
| -rw-r--r-- | fs/exec.c | 2 | ||||
| -rw-r--r-- | fs/namei.c | 6 | ||||
| -rw-r--r-- | fs/nfs/delegation.c | 86 | ||||
| -rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 2 | ||||
| -rw-r--r-- | fs/notify/inotify/inotify_user.c | 16 | ||||
| -rw-r--r-- | fs/proc/array.c | 3 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 19 |
26 files changed, 295 insertions, 136 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 109a6c606d92..e8e5e63ac950 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) } /* Trigger mount for path component or follow link */ } else if (ino->flags & AUTOFS_INF_PENDING || - autofs4_need_mount(flags) || - current->link_count) { + autofs4_need_mount(flags)) { DPRINTK("waiting for mount name=%.*s", dentry->d_name.len, dentry->d_name.name); @@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) spin_unlock(&dcache_lock); spin_unlock(&sbi->fs_lock); - status = try_to_fill_dentry(dentry, 0); + status = try_to_fill_dentry(dentry, nd->flags); if (status) goto out_error; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index f7c255f9c624..a8cd821226da 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -34,6 +34,7 @@ struct cachefiles_object { loff_t i_size; /* object size */ unsigned long flags; #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ +#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */ atomic_t usage; /* object usage count */ uint8_t type; /* object type */ uint8_t new; /* T if object new */ diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d5db84a1ee0d..f4a7840bf42c 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object, } /* + * mark the owner of a dentry, if there is one, to indicate that that dentry + * has been preemptively deleted + * - the caller must hold the i_mutex on the dentry's parent as required to + * call vfs_unlink(), vfs_rmdir() or vfs_rename() + */ +static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, + struct dentry *dentry) +{ + struct cachefiles_object *object; + struct rb_node *p; + + _enter(",'%*.*s'", + dentry->d_name.len, dentry->d_name.len, dentry->d_name.name); + + write_lock(&cache->active_lock); + + p = cache->active_nodes.rb_node; + while (p) { + object = rb_entry(p, struct cachefiles_object, active_node); + if (object->dentry > dentry) + p = p->rb_left; + else if (object->dentry < dentry) + p = p->rb_right; + else + goto found_dentry; + } + + write_unlock(&cache->active_lock); + _leave(" [no owner]"); + return; + + /* found the dentry for */ +found_dentry: + kdebug("preemptive burial: OBJ%x [%s] %p", + object->fscache.debug_id, + fscache_object_states[object->fscache.state], + dentry); + + if (object->fscache.state < FSCACHE_OBJECT_DYING) { + printk(KERN_ERR "\n"); + printk(KERN_ERR "CacheFiles: Error:" + " Can't preemptively bury live object\n"); + cachefiles_printk_object(object, NULL); + } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + printk(KERN_ERR "CacheFiles: Error:" + " Object already preemptively buried\n"); + } + + write_unlock(&cache->active_lock); + _leave(" [owner marked]"); +} + +/* * record the fact that an object is now active */ static int cachefiles_mark_object_active(struct cachefiles_cache *cache, @@ -219,7 +272,8 @@ requeue: */ static int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *dir, - struct dentry *rep) + struct dentry *rep, + bool preemptive) { struct dentry *grave, *trap; char nbuffer[8 + 8 + 1]; @@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, dir->d_name.len, dir->d_name.len, dir->d_name.name, rep->d_name.len, rep->d_name.len, rep->d_name.name); + _debug("remove %p from %p", rep, dir); + /* non-directories can just be unlinked */ if (!S_ISDIR(rep->d_inode->i_mode)) { _debug("unlink stale object"); ret = vfs_unlink(dir->d_inode, rep); + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + mutex_unlock(&dir->d_inode->i_mutex); if (ret == -EIO) @@ -325,6 +384,9 @@ try_again: if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); + if (preemptive) + cachefiles_mark_object_buried(cache, rep); + unlock_rename(cache->graveyard, dir); dput(grave); _leave(" = 0"); @@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, struct dentry *dir; int ret; - _enter(",{%p}", object->dentry); + _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry); ASSERT(object->dentry); ASSERT(object->dentry->d_inode); @@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache, mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - /* we need to check that our parent is _still_ our parent - it may have - * been renamed */ - if (dir == object->dentry->d_parent) { - ret = cachefiles_bury_object(cache, dir, object->dentry); - } else { - /* it got moved, presumably by cachefilesd culling it, so it's - * no longer in the key path and we can ignore it */ + if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) { + /* object allocation for the same key preemptively deleted this + * object's file so that it could create its own file */ + _debug("object preemptively buried"); mutex_unlock(&dir->d_inode->i_mutex); ret = 0; + } else { + /* we need to check that our parent is _still_ our parent - it + * may have been renamed */ + if (dir == object->dentry->d_parent) { + ret = cachefiles_bury_object(cache, dir, + object->dentry, false); + } else { + /* it got moved, presumably by cachefilesd culling it, + * so it's no longer in the key path and we can ignore + * it */ + mutex_unlock(&dir->d_inode->i_mutex); + ret = 0; + } } dput(dir); @@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent, const char *name; int ret, nlen; - _enter("{%p},,%s,", parent->dentry, key); + _enter("OBJ%x{%p},OBJ%x,%s,", + parent->fscache.debug_id, parent->dentry, + object->fscache.debug_id, key); cache = container_of(parent->fscache.cache, struct cachefiles_cache, cache); @@ -509,7 +583,7 @@ lookup_again: * mutex) */ object->dentry = NULL; - ret = cachefiles_bury_object(cache, dir, next); + ret = cachefiles_bury_object(cache, dir, next, true); dput(next); next = NULL; @@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, /* actually remove the victim (drops the dir mutex) */ _debug("bury"); - ret = cachefiles_bury_object(cache, dir, victim); + ret = cachefiles_bury_object(cache, dir, victim, false); if (ret < 0) goto error; diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index b5808cdb2232..039b5011d83b 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, /* * check the security details of the on-disk cache * - must be called with security override in force + * - must return with a security override in force - even in the case of an + * error */ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, struct dentry *root, @@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache, * which create files */ ret = set_create_files_as(new, root->d_inode); if (ret < 0) { + abort_creds(new); + cachefiles_begin_secure(cache, _saved_cred); _leave(" = %d [cfa]", ret); return ret; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4b42c2bb603f..a9005d862ed4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -504,7 +504,6 @@ static void writepages_finish(struct ceph_osd_request *req, int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - struct writeback_control *wbc = req->r_wbc; __s32 rc = -EIO; u64 bytes = 0; struct ceph_client *client = ceph_inode_to_client(inode); @@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); - if (i >= wrote) { - dout("inode %p skipping page %p\n", inode, page); - wbc->pages_skipped++; - } ceph_put_snap_context((void *)page->private); page->private = 0; ClearPagePrivate(page); @@ -799,7 +794,6 @@ get_more_pages: alloc_page_vec(client, req); req->r_callback = writepages_finish; req->r_inode = inode; - req->r_wbc = wbc; } /* note position of first page in pvec */ diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 0c1681806867..d9400534b279 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) } /* + * Remove a cap. Take steps to deal with a racing iterate_session_caps. + * * caller should hold i_lock. * caller will not hold session s_mutex if called from destroy_inode. */ @@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap) struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; + int removed = 0; dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - cap->ci = NULL; - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap) list_del_init(&cap->session_caps); session->s_nr_caps--; cap->session = NULL; + removed = 1; } + /* protect backpointer with s_cap_lock: see iterate_session_caps */ + cap->ci = NULL; spin_unlock(&session->s_cap_lock); - if (cap->session == NULL) + /* remove from inode list */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + + if (removed) ceph_put_cap(cap); if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 261f3e6c0bcf..85b4d2ffdeba 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -733,6 +733,10 @@ no_change: __ceph_get_fmode(ci, cap_fmode); spin_unlock(&inode->i_lock); } + } else if (cap_fmode >= 0) { + pr_warning("mds issued no caps on %llx.%llx\n", + ceph_vinop(inode)); + __ceph_get_fmode(ci, cap_fmode); } /* update delegation info? */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 60a9a4ae47be..24561a557e01 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session) } /* - * Helper to safely iterate over all caps associated with a session. + * Helper to safely iterate over all caps associated with a session, with + * special care taken to handle a racing __ceph_remove_cap(). * - * caller must hold session s_mutex + * Caller must hold session s_mutex. */ static int iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, struct ceph_cap *, @@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) struct ceph_mds_session *session = NULL; struct ceph_msg *reply; struct rb_node *p; - int err; + int err = -ENOMEM; struct ceph_pagelist *pagelist; pr_info("reconnect to recovering mds%d\n", mds); @@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) goto fail; err = iterate_session_caps(session, encode_caps_cb, pagelist); if (err < 0) - goto out; + goto fail; /* * snaprealms. we provide mds with the ino, seq (version), and @@ -2213,28 +2214,31 @@ send: reply->nr_pages = calc_pages_for(0, pagelist->length); ceph_con_send(&session->s_con, reply); - if (session) { - session->s_state = CEPH_MDS_SESSION_OPEN; - __wake_requests(mdsc, &session->s_waiting); - } + session->s_state = CEPH_MDS_SESSION_OPEN; + mutex_unlock(&session->s_mutex); + + mutex_lock(&mdsc->mutex); + __wake_requests(mdsc, &session->s_waiting); + mutex_unlock(&mdsc->mutex); + + ceph_put_mds_session(session); -out: up_read(&mdsc->snap_rwsem); - if (session) { - mutex_unlock(&session->s_mutex); - ceph_put_mds_session(session); - } mutex_lock(&mdsc->mutex); return; fail: ceph_msg_put(reply); + up_read(&mdsc->snap_rwsem); + mutex_unlock(&session->s_mutex); + ceph_put_mds_session(session); fail_nomsg: ceph_pagelist_release(pagelist); kfree(pagelist); fail_nopagelist: - pr_err("ENOMEM preparing reconnect for mds%d\n", mds); - goto out; + pr_err("error %d preparing reconnect for mds%d\n", err, mds); + mutex_lock(&mdsc->mutex); + return; } diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 509f57d9ccb3..cd4fadb6491a 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con) list_move_tail(&m->list_head, &con->out_sent); } - m->hdr.seq = cpu_to_le64(++con->out_seq); + /* + * only assign outgoing seq # if we haven't sent this message + * yet. if it is requeued, resend with it's original seq. + */ + if (m->needs_out_seq) { + m->hdr.seq = cpu_to_le64(++con->out_seq); + m->needs_out_seq = false; + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", m, con->out_seq, le16_to_cpu(m->hdr.type), @@ -1986,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); + msg->needs_out_seq = true; + /* queue */ mutex_lock(&con->mutex); BUG_ON(!list_empty(&msg->list_head)); @@ -2085,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, kref_init(&m->kref); INIT_LIST_HEAD(&m->list_head); + m->hdr.tid = 0; m->hdr.type = cpu_to_le16(type); + m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.version = 0; m->hdr.front_len = cpu_to_le32(front_len); m->hdr.middle_len = 0; m->hdr.data_len = cpu_to_le32(page_len); m->hdr.data_off = cpu_to_le16(page_off); - m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); + m->hdr.reserved = 0; m->footer.front_crc = 0; m->footer.middle_crc = 0; m->footer.data_crc = 0; + m->footer.flags = 0; m->front_max = front_len; m->front_is_vmalloc = false; m->more_to_follow = false; diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h index a343dae73cdc..a5caf91cc971 100644 --- a/fs/ceph/messenger.h +++ b/fs/ceph/messenger.h @@ -86,6 +86,7 @@ struct ceph_msg { struct kref kref; bool front_is_vmalloc; bool more_to_follow; + bool needs_out_seq; int front_max; struct ceph_msgpool *pool; diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index c7b4dedaace6..3514f71ff85f 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c @@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc, { struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; - int o = -1; + int acting[CEPH_PG_MAX_SIZE]; + int o = -1, num = 0; int err; dout("map_osds %p tid %lld\n", req, req->r_tid); @@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc, pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; - o = ceph_calc_pg_primary(osdc->osdmap, pgid); + err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); + if (err > 0) { + o = acting[0]; + num = err; + } if ((req->r_osd && req->r_osd->o_osd == o && - req->r_sent >= req->r_osd->o_incarnation) || + req->r_sent >= req->r_osd->o_incarnation && + req->r_num_pg_osds == num && + memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || (req->r_osd == NULL && o == -1)) return 0; /* no change */ @@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc, req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, req->r_osd ? req->r_osd->o_osd : -1); + /* record full pg acting set */ + memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); + req->r_num_pg_osds = num; + if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); @@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc, __remove_osd_from_lru(req->r_osd); list_add(&req->r_osd_item, &req->r_osd->o_requests); } - err = 1; /* osd changed */ + err = 1; /* osd or pg changed */ out: return err; @@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_osd_request *req; u64 tid; int numops, object_len, flags; + s32 result; tid = le64_to_cpu(msg->hdr.tid); if (msg->front.iov_len < sizeof(*rhead)) goto bad; numops = le32_to_cpu(rhead->num_ops); object_len = le32_to_cpu(rhead->object_len); + result = le32_to_cpu(rhead->result); if (msg->front.iov_len != sizeof(*rhead) + object_len + numops * sizeof(struct ceph_osd_op)) goto bad; - dout("handle_reply %p tid %llu\n", msg, tid); + dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); /* lookup */ mutex_lock(&osdc->request_mutex); @@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("handle_reply tid %llu flags %d\n", tid, flags); /* either this is a read, or we got the safe response */ - if ((flags & CEPH_OSD_FLAG_ONDISK) || + if (result < 0 || + (flags & CEPH_OSD_FLAG_ONDISK) || ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h index b0759911e7c3..ce776989ef6a 100644 --- a/fs/ceph/osd_client.h +++ b/fs/ceph/osd_client.h @@ -48,6 +48,8 @@ struct ceph_osd_request { struct list_head r_osd_item; struct ceph_osd *r_osd; struct ceph_pg r_pgid; + int r_pg_osds[CEPH_PG_MAX_SIZE]; + int r_num_pg_osds; struct ceph_connection *r_con_filling_msg; @@ -66,7 +68,6 @@ struct ceph_osd_request { struct list_head r_unsafe_item; struct inode *r_inode; /* for use by callbacks */ - struct writeback_control *r_wbc; /* ditto */ char r_oid[40]; /* object name */ int r_oid_len; diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 2e2c15eed82a..cfdd8f4388b7 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c @@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* + * Return acting set for given pgid. + */ +int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting) +{ + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, o, num = CEPH_PG_MAX_SIZE; + + osds = calc_pg_raw(osdmap, pgid, rawosds, &num); + if (!osds) + return -1; + + /* primary is first up osd */ + o = 0; + for (i = 0; i < num; i++) + if (ceph_osd_is_up(osdmap, osds[i])) + acting[o++] = osds[i]; + return o; +} + +/* * Return primary osd for given pgid, or -1 if none. */ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) { - int rawosds[10], *osds; - int i, num = ARRAY_SIZE(rawosds); + int rawosds[CEPH_PG_MAX_SIZE], *osds; + int i, num = CEPH_PG_MAX_SIZE; osds = calc_pg_raw(osdmap, pgid, rawosds, &num); if (!osds) @@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) /* primary is first up osd */ for (i = 0; i < num; i++) - if (ceph_osd_is_up(osdmap, osds[i])) { + if (ceph_osd_is_up(osdmap, osds[i])) return osds[i]; - break; - } return -1; } diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 8bc9f1e4f562..970b547e510d 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h @@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, + int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a1fc1d017b58..fd56451a871f 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h @@ -58,6 +58,7 @@ struct ceph_timespec { #define CEPH_PG_LAYOUT_LINEAR 2 #define CEPH_PG_LAYOUT_HYBRID 3 +#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ /* * placement group. diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f888cf487b7c..110857ba9269 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len) */ static void ceph_put_super(struct super_block *s) { - struct ceph_client *cl = ceph_client(s); + struct ceph_client *client = ceph_sb_to_client(s); dout("put_super\n"); - ceph_mdsc_close_sessions(&cl->mdsc); + ceph_mdsc_close_sessions(&client->mdsc); + + /* + * ensure we release the bdi before put_anon_super releases + * the device name. + */ + if (s->s_bdi == &client->backing_dev_info) { + bdi_unregister(&client->backing_dev_info); + s->s_bdi = NULL; + } + return; } @@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client) destroy_workqueue(client->pg_inv_wq); destroy_workqueue(client->trunc_wq); + bdi_destroy(&client->backing_dev_info); + if (client->msgr) ceph_messenger_destroy(client->msgr); mempool_destroy(client->wb_pagevec_pool); @@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { int err; - sb->s_bdi = &client->backing_dev_info; - /* set ra_pages based on rsize mount option? */ if (client->mount_args->rsize >= PAGE_CACHE_SIZE) client->backing_dev_info.ra_pages = (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); + if (!err) + sb->s_bdi = &client->backing_dev_info; return err; } @@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s) dout("kill_sb %p\n", s); ceph_mdsc_pre_umount(&client->mdsc); kill_anon_super(s); /* will call put_super after sb is r/o */ - if (s->s_bdi == &client->backing_dev_info) - bdi_unregister(&client->backing_dev_info); - bdi_destroy(&client->backing_dev_info); ceph_destroy_client(client); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ecf0ffbe2b64..0c2fd17439c8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -502,6 +502,7 @@ struct dfs_info3_param { #define CIFS_FATTR_DFS_REFERRAL 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 +#define CIFS_FATTR_INO_COLLISION 0x8 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 35ec11716213..29b9ea244c81 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque) if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) return 0; + /* + * uh oh -- it's a directory. We can't use it since hardlinked dirs are + * verboten. Disable serverino and return it as if it were found, the + * caller can discard it, generate a uniqueid and retry the find + */ + if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { + fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; + cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); + } + return 1; } @@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) unsigned long hash; struct inode *inode; +retry_iget5_locked: cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); /* hash down to 32-bits on 32-bit arch */ hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); - - /* we have fattrs in hand, update the inode */ if (inode) { + /* was there a problematic inode number collision? */ + if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { + iput(inode); + fattr->cf_uniqueid = iunique(sb, ROOT_I); + fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; + goto retry_iget5_locked; + } + cifs_fattr_to_inode(inode, fattr); if (sb->s_flags & MS_NOATIME) inode->i_flags |= S_NOATIME | S_NOCMTIME; diff --git a/fs/compat.c b/fs/compat.c index 4b6ed03cc478..05448730f840 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/exec.c b/fs/exec.c index 49cdaa19e5b9..e6e94c626c2c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1387,8 +1387,6 @@ int do_execve(char * filename, if (retval < 0) goto out; - current->stack_start = current->mm->start_stack; - /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; diff --git a/fs/namei.c b/fs/namei.c index a7dce91a7e42..16df7277a92e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) { if (open_flag & O_CREAT) goto exit; - nd->flags |= LOOKUP_DIRECTORY; + nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; } /* just plain open? */ @@ -1830,6 +1830,8 @@ reval: } if (open_flag & O_DIRECTORY) nd.flags |= LOOKUP_DIRECTORY; + if (!(open_flag & O_NOFOLLOW)) + nd.flags |= LOOKUP_FOLLOW; filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path holder; @@ -1837,7 +1839,7 @@ reval: void *cookie; error = -ELOOP; /* S_ISDIR part is a temporary automount kludge */ - if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) + if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) goto exit_dput; if (count++ == 32) goto exit_dput; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 15671245c6ee..ea61d26e7871 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -24,6 +24,8 @@ static void nfs_do_free_delegation(struct nfs_delegation *delegation) { + if (delegation->cred) + put_rpccred(delegation->cred); kfree(delegation); } @@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head) static void nfs_free_delegation(struct nfs_delegation *delegation) { - struct rpc_cred *cred; - - cred = rcu_dereference(delegation->cred); - rcu_assign_pointer(delegation->cred, NULL); call_rcu(&delegation->rcu, nfs_free_delegation_callback); - if (cred) - put_rpccred(cred); } void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) @@ -129,21 +125,35 @@ again: */ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { - struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct rpc_cred *oldcred; + struct nfs_delegation *delegation; + struct rpc_cred *oldcred = NULL; - if (delegation == NULL) - return; - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); - delegation->type = res->delegation_type; - delegation->maxsize = res->maxsize; - oldcred = delegation->cred; - delegation->cred = get_rpccred(cred); - clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); - NFS_I(inode)->delegation_state = delegation->type; - smp_wmb(); - put_rpccred(oldcred); + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + spin_lock(&delegation->lock); + if (delegation->inode != NULL) { + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + oldcred = delegation->cred; + delegation->cred = get_rpccred(cred); + clear_bit(NFS_DELEGATION_NEED_RECLAIM, + &delegation->flags); + NFS_I(inode)->delegation_state = delegation->type; + spin_unlock(&delegation->lock); + put_rpccred(oldcred); + rcu_read_unlock(); + } else { + /* We appear to have raced with a delegation return. */ + spin_unlock(&delegation->lock); + rcu_read_unlock(); + nfs_inode_set_delegation(inode, cred, res); + } + } else { + rcu_read_unlock(); + } } static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) @@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation return inode; } -static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) +static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, + const nfs4_stateid *stateid, + struct nfs_client *clp) { - struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); + struct nfs_delegation *delegation = + rcu_dereference_protected(nfsi->delegation, + lockdep_is_held(&clp->cl_lock)); if (delegation == NULL) goto nomatch; @@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_delegation *delegation; + struct nfs_delegation *delegation, *old_delegation; struct nfs_delegation *freeme = NULL; int status = 0; @@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); - if (rcu_dereference(nfsi->delegation) != NULL) { - if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, - sizeof(delegation->stateid)) == 0 && - delegation->type == nfsi->delegation->type) { + old_delegation = rcu_dereference_protected(nfsi->delegation, + lockdep_is_held(&clp->cl_lock)); + if (old_delegation != NULL) { + if (memcmp(&delegation->stateid, &old_delegation->stateid, + sizeof(old_delegation->stateid)) == 0 && + delegation->type == old_delegation->type) { goto out; } /* @@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", __func__, clp->cl_hostname); - if (delegation->type <= nfsi->delegation->type) { + if (delegation->type <= old_delegation->type) { freeme = delegation; delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, NULL); + freeme = nfs_detach_delegation_locked(nfsi, NULL, clp); } list_add_rcu(&delegation->super_list, &clp->cl_delegations); nfsi->delegation_state = delegation->type; @@ -301,7 +317,7 @@ restart: if (inode == NULL) continue; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) { @@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - if (rcu_dereference(nfsi->delegation) != NULL) { + if (rcu_access_pointer(nfsi->delegation) != NULL) { spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL); + delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); spin_unlock(&clp->cl_lock); if (delegation != NULL) nfs_do_return_delegation(inode, delegation, 0); @@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; int err = 0; - if (rcu_dereference(nfsi->delegation) != NULL) { + if (rcu_access_pointer(nfsi->delegation) != NULL) { spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(nfsi, NULL); + delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); spin_unlock(&clp->cl_lock); if (delegation != NULL) { nfs_msync_inode(inode); @@ -526,7 +542,7 @@ restart: if (inode == NULL) continue; spin_lock(&clp->cl_lock); - delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); + delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp); spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1afb0a10229f..e27960cd76ab 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -28,6 +28,7 @@ #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> +#include <linux/sched.h> #include "inotify.h" @@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group) idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_remove_all(&group->inotify_data.idr); idr_destroy(&group->inotify_data.idr); + free_uid(group->inotify_data.user); } void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 472cdf29ef82..e46ca685b9be 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -546,21 +546,24 @@ retry: if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) goto out_err; + /* we are putting the mark on the idr, take a reference */ + fsnotify_get_mark(&tmp_ientry->fsn_entry); + spin_lock(&group->inotify_data.idr_lock); ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, group->inotify_data.last_wd+1, &tmp_ientry->wd); spin_unlock(&group->inotify_data.idr_lock); if (ret) { + /* we didn't get on the idr, drop the idr reference */ + fsnotify_put_mark(&tmp_ientry->fsn_entry); + /* idr was out of memory allocate and try again */ if (ret == -EAGAIN) goto retry; goto out_err; } - /* we put the mark on the idr, take a reference */ - fsnotify_get_mark(&tmp_ientry->fsn_entry); - /* we are on the idr, now get on the inode */ ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); if (ret) { @@ -578,16 +581,13 @@ retry: /* return the watch descriptor for this new entry */ ret = tmp_ientry->wd; - /* match the ref from fsnotify_init_markentry() */ - fsnotify_put_mark(&tmp_ientry->fsn_entry); - /* if this mark added a new event update the group mask */ if (mask & ~group->mask) fsnotify_recalc_group_mask(group); out_err: - if (ret < 0) - kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); + /* match the ref from fsnotify_init_markentry() */ + fsnotify_put_mark(&tmp_ientry->fsn_entry); return ret; } diff --git a/fs/proc/array.c b/fs/proc/array.c index e51f2ec2c5e5..885ab5513ac5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -81,7 +81,6 @@ #include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/tracehook.h> -#include <linux/swapops.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, rsslim, mm ? mm->start_code : 0, mm ? mm->end_code : 0, - (permitted && mm) ? task->stack_start : 0, + (permitted && mm) ? mm->start_stack : 0, esp, eip, /* The signal information here is obsolete. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 070553427dd5..47f5b145f56e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { name = "[stack]"; - } else { - unsigned long stack_start; - struct proc_maps_private *pmp; - - pmp = m->private; - stack_start = pmp->task->stack_start; - - if (vma->vm_start <= stack_start && - vma->vm_end >= stack_start) { - pad_len_spaces(m, len); - seq_printf(m, - "[threadstack:%08lx]", -#ifdef CONFIG_STACK_GROWSUP - vma->vm_end - stack_start -#else - stack_start - vma->vm_start -#endif - ); - } } } else { name = "[vdso]"; |
