From cb2c0233755429037462e16ea0d5497a0092738c Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 7 Jul 2005 17:56:03 -0700 Subject: [PATCH] export generic_drop_inode() to modules OCFS2 wants to mark an inode which has been orphaned by another node so that during final iput it takes the correct path through the VFS and can pass through the OCFS2 delete_inode callback. Since i_nlink can get out of date with other nodes, the best way I see to accomplish this is by clearing i_nlink on those inodes at drop_inode time. Other than this small amount of work, nothing different needs to happen, so I think it would be cleanest to be able to just call generic_drop_inode at the end of the OCFS2 drop_inode callback. Signed-off-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 047bde30836a..302ec20838ca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1435,6 +1435,7 @@ extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); +extern void generic_drop_inode(struct inode *inode); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); -- cgit v1.2.3 From 79b9ce311e192e9a31fd9f3cf1ee4a4edf9e2650 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 7 Jul 2005 17:56:04 -0700 Subject: [PATCH] print order information when OOM killing Dump the current allocation order when OOM killing. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2343f999e6e1..c75954f2d868 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -148,7 +148,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(unsigned int __nocast gfp_mask); +extern void out_of_memory(unsigned int __nocast gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); -- cgit v1.2.3 From cf36680887d6d942d2119c1ff1dfb2428b0f21f4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:13 -0700 Subject: [PATCH] move ioprio syscalls into syscalls.h - Make ioprio syscalls return long, like set/getpriority syscalls. - Move function prototypes into syscalls.h so we can pick them up in the 32/64bit compat code. Signed-off-by: Anton Blanchard Acked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioprio.h | 3 --- include/linux/syscalls.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8a453a0b5e4b..88d5961f7a3f 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -34,9 +34,6 @@ enum { */ #define IOPRIO_BE_NR (8) -asmlinkage int sys_ioprio_set(int, int, int); -asmlinkage int sys_ioprio_get(int, int); - enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 52830b6d94e5..425f58c8ea4a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -506,4 +506,7 @@ asmlinkage long sys_request_key(const char __user *_type, asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); +asmlinkage long sys_ioprio_set(int which, int who, int ioprio); +asmlinkage long sys_ioprio_get(int which, int who); + #endif -- cgit v1.2.3 From e00d9967e3addea86dded46deefc5daec5d52e5a Mon Sep 17 00:00:00 2001 From: Bernard Blackham Date: Thu, 7 Jul 2005 17:56:42 -0700 Subject: [PATCH] pm: fix u32 vs. pm_message_t confusion in cpufreq Fix u32 vs pm_message_t confusion in cpufreq. Signed-off-by: Bernard Blackham Signed-off-by: Pavel Machek Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 927daa86c9b3..ff7f80f48df1 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -201,7 +201,7 @@ struct cpufreq_driver { /* optional */ int (*exit) (struct cpufreq_policy *policy); - int (*suspend) (struct cpufreq_policy *policy, u32 state); + int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; }; -- cgit v1.2.3 From a39722034ae37f80a1803bf781fe3fe1b03e20bc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 7 Jul 2005 17:56:56 -0700 Subject: [PATCH] page_uptodate locking scalability Use a bit spin lock in the first buffer of the page to synchronise asynch IO buffer completions, instead of the global page_uptodate_lock, which is showing some scalabilty problems. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 802c91e9b3da..90828493791f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -19,6 +19,9 @@ enum bh_state_bits { BH_Dirty, /* Is dirty */ BH_Lock, /* Is locked */ BH_Req, /* Has been submitted for I/O */ + BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise + * IO completion of other buffers in the page + */ BH_Mapped, /* Has a disk mapping */ BH_New, /* Disk mapping was newly created by get_block */ -- cgit v1.2.3 From 0db925af1db5f3dfe1691c35b39496e2baaff9c9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 7 Jul 2005 17:56:58 -0700 Subject: [PATCH] propagate __nocast annotations Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- include/linux/slab.h | 4 ++-- include/linux/string.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 8d6bf608b199..7c7400137e97 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -12,8 +12,8 @@ struct vm_area_struct; * GFP bitmasks.. */ /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 +#define __GFP_DMA 0x01u +#define __GFP_HIGHMEM 0x02u /* * Action modifiers - doesn't change the zoning diff --git a/include/linux/slab.h b/include/linux/slab.h index 76cf7e60216c..4c8e552471b0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -65,7 +65,7 @@ extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -105,7 +105,7 @@ extern unsigned int ksize(const void *); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node); -extern void *kmalloc_node(size_t size, int flags, int node); +extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) { diff --git a/include/linux/string.h b/include/linux/string.h index 93994c613095..dab2652acbd8 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,7 +88,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif -extern char *kstrdup(const char *s, int gfp); +extern char *kstrdup(const char *s, unsigned int __nocast gfp); #ifdef __cplusplus } -- cgit v1.2.3 From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 7 Jul 2005 17:56:59 -0700 Subject: [PATCH] mostly_read data section Add a new section called ".data.read_mostly" for data items that are read frequently and rarely written to like cpumaps etc. If these maps are placed in the .data section then these frequenly read items may end up in cachelines with data is is frequently updated. In that case all processors in an SMP system must needlessly reload the cachelines again and again containing elements of those frequently used variables. The ability to share these cachelines will allow each cpu in an SMP system to keep local copies of those shared cachelines thereby optimizing performance. Signed-off-by: Alok N Kataria Signed-off-by: Shobhit Dayal Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cache.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cache.h b/include/linux/cache.h index 4d767b93738a..2b66a36d85f0 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,6 +13,12 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +#ifdef CONFIG_X86 +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#else +#define __read_mostly +#endif + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif -- cgit v1.2.3 From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry() This patch fixes a race found by Ram in mark_mounts_for_expiry() in fs/namespace.c. The bug can only be triggered with simultaneous exiting of a process having a private namespace, and expiry of a mount from within that namespace. It's practically impossible to trigger, and I haven't even tried. But still, a bug is a bug. The race happens when put_namespace() is called by another task, while mark_mounts_for_expiry() is between atomic_read() and get_namespace(). In that case get_namespace() will be called on an already dead namespace with unforeseeable results. The solution was suggested by Al Viro, with his own words: Instead of screwing with atomic_read() in there, why don't we simply do the following: a) atomic_dec_and_lock() in put_namespace() b) __put_namespace() called without dropping lock c) the first thing done by __put_namespace would be struct vfsmount *root = namespace->root; namespace->root = NULL; spin_unlock(...); .... umount_tree(root); ... d) check in mark_... would be simply namespace && namespace->root. And we are all set; no screwing around with atomic_read(), no magic at all. Dying namespace gets NULL ->root. All changes of ->root happen under spinlock. If under a spinlock we see non-NULL ->mnt_namespace, it won't be freed until we drop the lock (we will set ->mnt_namespace to NULL under that lock before we get to freeing namespace). If under a spinlock we see non-NULL ->mnt_namespace and ->mnt_namespace->root, we can grab a reference to namespace and be sure that it won't go away. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/namespace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namespace.h b/include/linux/namespace.h index 697991b69f9b..0e5a86f13b2f 100644 --- a/include/linux/namespace.h +++ b/include/linux/namespace.h @@ -17,7 +17,8 @@ extern void __put_namespace(struct namespace *namespace); static inline void put_namespace(struct namespace *namespace) { - if (atomic_dec_and_test(&namespace->count)) + if (atomic_dec_and_lock(&namespace->count, &vfsmount_lock)) + /* releases vfsmount_lock */ __put_namespace(namespace); } -- cgit v1.2.3 From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire This patch renames vfsmount->mnt_fslink to something a little more descriptive: vfsmount->mnt_expire. Signed-off-by: Mike Waychison Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 8b8d3b9beefd..196d2d6de4a3 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -34,7 +34,7 @@ struct vfsmount int mnt_expiry_mark; /* true if marked for expiry */ char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; - struct list_head mnt_fslink; /* link in fs-specific expiry list */ + struct list_head mnt_expire; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ }; -- cgit v1.2.3 From 751c404b8f63e8199d5f2f8f2bcfd69b41d11caa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename _mntput to mntput_no_expire This patch renames _mntput() to something a little more descriptive: mntput_no_expire(). Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mount.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 196d2d6de4a3..74b4727a4e30 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -47,7 +47,7 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) extern void __mntput(struct vfsmount *mnt); -static inline void _mntput(struct vfsmount *mnt) +static inline void mntput_no_expire(struct vfsmount *mnt) { if (mnt) { if (atomic_dec_and_test(&mnt->mnt_count)) @@ -59,7 +59,7 @@ static inline void mntput(struct vfsmount *mnt) { if (mnt) { mnt->mnt_expiry_mark = 0; - _mntput(mnt); + mntput_no_expire(mnt); } } -- cgit v1.2.3 From a6ccbbb8865101d83c2e716f08feae1da1c48584 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:11 -0700 Subject: [PATCH] nfsd4: fix sync'ing of recovery directory We need to fsync the recovery directory after writing to it, but we weren't doing this correctly. (For example, we weren't taking the i_sem when calling ->fsync().) Just reuse the existing nfsd fsync code instead. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 5791dfd30dd0..c2da1b62d416 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -124,6 +124,7 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); +void nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3 From 7fb64cee34f5dc743f697041717cafda8a94b5ac Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:20 -0700 Subject: [PATCH] nfsd4: seqid comments Add some comments on the use of so_seqid, in an attempt to avoid some of the confusion outlined in the previous patch.... Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/state.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index a84a3fa99be1..2d19431f47ea 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -203,7 +203,9 @@ struct nfs4_stateowner { int so_is_open_owner; /* 1=openowner,0=lockowner */ u32 so_id; struct nfs4_client * so_client; - u32 so_seqid; + /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + * sequence id expected from the client: */ + u32 so_seqid; struct xdr_netobj so_owner; /* open owner name */ int so_confirmed; /* successful OPEN_CONFIRM? */ struct nfs4_replay so_replay; -- cgit v1.2.3 From b700949b781480819e53bdc38a53f053226dd75e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:23 -0700 Subject: [PATCH] nfsd4: return better error on io incompatible with open mode from RFC 3530: "Share reservations are established by OPEN operations and by their nature are mandatory in that when the OPEN denies READ or WRITE operations, that denial results in such operations being rejected with error NFS4ERR_LOCKED." (Note that share_denied is really only a legal error for OPEN.) Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index c2da1b62d416..6d5a24f3fc6d 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -231,6 +231,7 @@ void nfsd_lockd_shutdown(void); #define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) #define nfserr_badname __constant_htonl(NFSERR_BADNAME) #define nfserr_cb_path_down __constant_htonl(NFSERR_CB_PATH_DOWN) +#define nfserr_locked __constant_htonl(NFSERR_LOCKED) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. -- cgit v1.2.3 From 4c4cd222ee329025840bc2f8cebf71d36c62440c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 7 Jul 2005 17:59:27 -0700 Subject: [PATCH] nfsd4: check lock type against openmode. We shouldn't be allowing, e.g., write locks on files not open for read. To enforce this, we add a pointer from the lock stateid back to the open stateid it came from, so that the check will continue to be correct even after the open is upgraded or downgraded. Signed-off-by: Andy Adamson Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/state.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 2d19431f47ea..8bf23cf8b603 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -237,6 +237,10 @@ struct nfs4_file { * st_perlockowner: (open stateid) list of lock nfs4_stateowners * st_access_bmap: used only for open stateid * st_deny_bmap: used only for open stateid +* st_openstp: open stateid lock stateid was derived from +* +* XXX: open stateids and lock stateids have diverged sufficiently that +* we should consider defining separate structs for the two cases. */ struct nfs4_stateid { @@ -250,6 +254,7 @@ struct nfs4_stateid { struct file * st_vfs_file; unsigned long st_access_bmap; unsigned long st_deny_bmap; + struct nfs4_stateid * st_openstp; }; /* flags for preprocess_seqid_op() */ -- cgit v1.2.3