From a19cbd4bf258840ade3b6ee9e9256006d0644e09 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Mar 2006 14:03:09 -0800 Subject: Mark the pipe file operations static They aren't used (nor even really usable) outside of pipe.c anyway Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e059da947007..0cc34b1c42c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *); extern struct file_operations read_fifo_fops; extern struct file_operations write_fifo_fops; extern struct file_operations rdwr_fifo_fops; -extern struct file_operations read_pipe_fops; -extern struct file_operations write_pipe_fops; -extern struct file_operations rdwr_pipe_fops; extern int fs_may_remount_ro(struct super_block *); -- cgit v1.2.3 From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 7 Mar 2006 21:55:31 -0800 Subject: [PATCH] percpu_counter_sum() Implement percpu_counter_sum(). This is a more accurate but slower version of percpu_counter_read_positive(). We need this for Alex's speedup-ext3_statfs patch and for the nr_file accounting fix. Otherwise these things would be too inaccurate on large CPU counts. Cc: Ravikiran G Thirumalai Cc: Alex Tomas Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index bd6708e2c027..682525511c9e 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc) } void percpu_counter_mod(struct percpu_counter *fbc, long amount); +long percpu_counter_sum(struct percpu_counter *fbc); static inline long percpu_counter_read(struct percpu_counter *fbc) { @@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc) return fbc->count; } +static inline long percpu_counter_sum(struct percpu_counter *fbc) +{ + return percpu_counter_read_positive(fbc); +} + #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) -- cgit v1.2.3 From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:33 -0800 Subject: [PATCH] rcu batch tuning This patch adds new tunables for RCU queue and finished batches. There are two types of controls - number of completed RCU updates invoked in a batch (blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark, qlowmark). By default, the per-cpu batch limit is set to a small value. If the input RCU rate exceeds the high watermark, we do two things - force quiescent state on all cpus and set the batch limit of the CPU to INTMAX. Setting batch limit to INTMAX forces all finished RCUs to be processed in one shot. If we have more than INTMAX RCUs queued up, then we have bigger problems anyway. Once the incoming queued RCUs fall below the low watermark, the batch limit is set to the default. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b87aefa082e2..c2ec6c77874e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -98,13 +98,17 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; - long count; /* # of queued items */ + long qlen; /* # of queued callbacks */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ int cpu; struct rcu_head barrier; +#ifdef CONFIG_SMP + long last_rs_qlen; /* qlen during the last resched */ +#endif }; DECLARE_PER_CPU(struct rcu_data, rcu_data); -- cgit v1.2.3 From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:35 -0800 Subject: [PATCH] fix file counting I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/file.h | 2 -- include/linux/fs.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 418b6101b59a..9901b850f2e4 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -60,8 +60,6 @@ extern void put_filp(struct file *); extern int get_unused_fd(void); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; -extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags); -extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags); extern struct file ** alloc_fd_array(int); extern void free_fd_array(struct file **, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cc34b1c42c9..51c0c93bdf93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ struct files_stat_struct { int max_files; /* tunable */ }; extern struct files_stat_struct files_stat; +extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; -- cgit v1.2.3 From 0ef675d491bd65028fa838015ebc6ce8abefab6f Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Thu, 9 Mar 2006 17:33:38 -0800 Subject: [PATCH] mtd: 64 bit fixes Fix some bugs in mtd/jffs2 on 64bit platform. The MEMGETBADBLOCK/MEMSETBADBLOCK ioctl are not listed in compat_ioctl.h. And some variables in jffs2 are declared as uint32_t but used to hold size_t values. Signed-off-by: Atsushi Nemoto Cc: Thomas Gleixner Acked-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat_ioctl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 8fad50f8e389..ae7dfb790df3 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -696,6 +696,8 @@ COMPATIBLE_IOCTL(MEMLOCK) COMPATIBLE_IOCTL(MEMUNLOCK) COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) COMPATIBLE_IOCTL(MEMGETREGIONINFO) +COMPATIBLE_IOCTL(MEMGETBADBLOCK) +COMPATIBLE_IOCTL(MEMSETBADBLOCK) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) -- cgit v1.2.3 From 8fce4d8e3b9e3cf47cc8afeb6077e22ab795d989 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Mar 2006 17:33:54 -0800 Subject: [PATCH] slab: Node rotor for freeing alien caches and remote per cpu pages. The cache reaper currently tries to free all alien caches and all remote per cpu pages in each pass of cache_reap. For a machines with large number of nodes (such as Altix) this may lead to sporadic delays of around ~10ms. Interrupts are disabled while reclaiming creating unacceptable delays. This patch changes that behavior by adding a per cpu reap_node variable. Instead of attempting to free all caches, we free only one alien cache and the per cpu pages from one remote node. That reduces the time spend in cache_reap. However, doing so will lengthen the time it takes to completely drain all remote per cpu pagesets and all alien caches. The time needed will grow with the number of nodes in the system. All caches are drained when they overflow their respective capacity. So the drawback here is only that a bit of memory may be wasted for awhile longer. Details: 1. Rename drain_remote_pages to drain_node_pages to allow the specification of the node to drain of pcp pages. 2. Add additional functions init_reap_node, next_reap_node for NUMA that manage a per cpu reap_node counter. 3. Add a reap_alien function that reaps only from the current reap_node. For us this seems to be a critical issue. Holdoffs of an average of ~7ms cause some HPC benchmarks to slow down significantly. F.e. NAS parallel slows down dramatically. NAS parallel has a 12-16 seconds runtime w/o rotor compared to 5.8 secs with the rotor patches. It gets down to 5.05 secs with the additional interrupt holdoff reductions. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 20f9148e38d9..7851e6b520cf 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page)); void page_alloc_init(void); #ifdef CONFIG_NUMA -void drain_remote_pages(void); +void drain_node_pages(int node); #else -static inline void drain_remote_pages(void) { }; +static inline void drain_node_pages(int node) { }; #endif #endif /* __LINUX_GFP_H */ -- cgit v1.2.3 From 0adb25d2e71ab047423d6fc63d5d184590d0a66f Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Sat, 11 Mar 2006 03:27:13 -0800 Subject: [PATCH] ext3: ext3_symlink should use GFP_NOFS allocations inside This patch fixes illegal __GFP_FS allocation inside ext3 transaction in ext3_symlink(). Such allocation may re-enter ext3 code from try_to_free_pages. But JBD/ext3 code keeps a pointer to current journal handle in task_struct and, hence, is not reentrable. This bug led to "Assertion failure in journal_dirty_metadata()" messages. http://bugzilla.openvz.org/show_bug.cgi?id=115 Signed-off-by: Andrey Savochkin Signed-off-by: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 51c0c93bdf93..128d0082522c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1664,6 +1664,8 @@ extern int vfs_follow_link(struct nameidata *, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); +extern int __page_symlink(struct inode *inode, const char *symname, int len, + gfp_t gfp_mask); extern int page_symlink(struct inode *inode, const char *symname, int len); extern struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); -- cgit v1.2.3 From 7cd9013be6c22f3ff6f777354f766c8c0b955e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 11 Mar 2006 03:27:18 -0800 Subject: [PATCH] remove __put_task_struct_cb export again The patch '[PATCH] RCU signal handling' [1] added an export for __put_task_struct_cb, a put_task_struct helper newly introduced in that patch. But the put_task_struct couldn't be used modular previously as __put_task_struct wasn't exported. There are not callers of it in modular code, and it shouldn't be exported because we don't want drivers to hold references to task_structs. This patch removes the export and folds __put_task_struct into __put_task_struct_cb as there's no other caller. [1] http://www2.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e56d090310d7625ecb43a1eeebd479f04affb48b Signed-off-by: Christoph Hellwig Acked-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ff2e09c953b9..62e6314382f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -892,7 +892,6 @@ static inline int pid_alive(struct task_struct *p) } extern void free_task(struct task_struct *tsk); -extern void __put_task_struct(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) extern void __put_task_struct_cb(struct rcu_head *rhp); -- cgit v1.2.3 From 4a29cc2e503b33a1e96db4c3f9a94165f153f259 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 19 Mar 2006 13:21:12 -0800 Subject: [TG3]: 40-bit DMA workaround part 2 The 40-bit DMA workaround recently implemented for 5714, 5715, and 5780 needs to be expanded because there may be other tg3 devices behind the EPB Express to PCIX bridge in the 5780 class device. For example, some 4-port card or mother board designs have 5704 behind the 5714. All devices behind the EPB require the 40-bit DMA workaround. Thanks to Chris Elmquist again for reporting the problem and testing the patch. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1709b5009d2e..751eea58bde8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1365,6 +1365,7 @@ #define PCI_DEVICE_ID_SERVERWORKS_HE 0x0008 #define PCI_DEVICE_ID_SERVERWORKS_LE 0x0009 #define PCI_DEVICE_ID_SERVERWORKS_GCNB_LE 0x0017 +#define PCI_DEVICE_ID_SERVERWORKS_EPB 0x0103 #define PCI_DEVICE_ID_SERVERWORKS_OSB4 0x0200 #define PCI_DEVICE_ID_SERVERWORKS_CSB5 0x0201 #define PCI_DEVICE_ID_SERVERWORKS_CSB6 0x0203 -- cgit v1.2.3