From 16cb4b333c9e7a00ce3b1d74ec0c9b4c2e956910 Mon Sep 17 00:00:00 2001 From: Per Liden Date: Fri, 13 Jan 2006 22:22:22 +0100 Subject: [TIPC] Updated link priority macros Added macros for min/default/max link priority in tipc_config.h. Also renamed TIPC_NUM_LINK_PRI to TIPC_MEDIA_LINK_PRI since that is a more accurate description of what it is used for. Signed-off-by: Per Liden --- include/linux/tipc_config.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index a52c8c64a5a3..33a653913d94 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -168,10 +168,13 @@ #define TIPC_MAX_LINK_NAME 60 /* format = Z.C.N:interface-Z.C.N:interface */ /* - * Link priority limits (range from 0 to # priorities - 1) + * Link priority limits (min, default, max, media default) */ -#define TIPC_NUM_LINK_PRI 32 +#define TIPC_MIN_LINK_PRI 0 +#define TIPC_DEF_LINK_PRI 10 +#define TIPC_MAX_LINK_PRI 31 +#define TIPC_MEDIA_LINK_PRI (TIPC_MAX_LINK_PRI + 1) /* * Link tolerance limits (min, default, max), in ms -- cgit v1.2.3 From 33a9c4da5ab16192ef1e961d4c4e45c18031cd67 Mon Sep 17 00:00:00 2001 From: Per Liden Date: Mon, 16 Jan 2006 11:42:12 +0100 Subject: [TIPC] Move ethernet protocol id to linux/if_ether.h Signed-off-by: Per Liden --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index fe26d431de87..7a92c1ce1457 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -72,6 +72,7 @@ * over Ethernet */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_TIPC 0x88CA /* TIPC */ /* * Non DIX types. Won't clash for 1500 types. -- cgit v1.2.3 From d9004eb466d03b7900ed432fecec6819012b4ed3 Mon Sep 17 00:00:00 2001 From: Alon Bar-Lev Date: Wed, 18 Jan 2006 11:47:33 +0000 Subject: [SERIAL] Add 8250 support for Decision Computer International Co. PCCOM2 There is a new device which is look like: Serial controller: Decision Computer International Co. PCCOM2 (rev 02) (prog-if 02 [16550]) 0700: 6666:0004 (rev 02) (prog-if 02) Flags: medium devsel, IRQ 177 Memory at fe000000 (32-bit, non-prefetchable) [size=128] I/O ports at e880 [size=128] I/O ports at e400 [size=256] It has two 16550A, and is not listed in kernel, although the manufacturer clams that it is supported... I've created the following patch, it only add the new PCI id and the card to the repository, it seems to work. Signed-off-by: Alon Bar-Lev Signed-off-by: Russell King --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5403257ae3e7..ecc1fc1f0f04 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1992,6 +1992,7 @@ #define PCI_VENDOR_ID_DCI 0x6666 #define PCI_DEVICE_ID_DCI_PCCOM4 0x0001 #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 +#define PCI_DEVICE_ID_DCI_PCCOM2 0x0004 #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -- cgit v1.2.3 From 053837fce7aa79025ed57656855df09f80175527 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Jan 2006 17:42:27 -0800 Subject: [PATCH] mm: migration page refcounting fix Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 21 --------------------- include/linux/swap.h | 1 + 2 files changed, 1 insertion(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 49cc68af01f8..8ac854f7f190 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page) } } -/* - * Isolate one page from the LRU lists. - * - * - zone->lru_lock must be held - */ -static inline int __isolate_lru_page(struct page *page) -{ - if (unlikely(!TestClearPageLRU(page))) - return 0; - - if (get_page_testone(page)) { - /* - * It is being freed elsewhere - */ - __put_page(page); - SetPageLRU(page); - return -ENOENT; - } - - return 1; -} diff --git a/include/linux/swap.h b/include/linux/swap.h index e92054d6530b..d01f7efb0f2c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); +extern int lru_add_drain_all(void); extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); -- cgit v1.2.3 From 9eeff2395e3cfd05c9b2e6074ff943a34b0c5c21 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:31 -0800 Subject: [PATCH] Zone reclaim: Reclaim logic Some bits for zone reclaim exists in 2.6.15 but they are not usable. This patch fixes them up, removes unused code and makes zone reclaim usable. Zone reclaim allows the reclaiming of pages from a zone if the number of free pages falls below the watermarks even if other zones still have enough pages available. Zone reclaim is of particular importance for NUMA machines. It can be more beneficial to reclaim a page than taking the performance penalties that come with allocating a page on a remote zone. Zone reclaim is enabled if the maximum distance to another node is higher than RECLAIM_DISTANCE, which may be defined by an arch. By default RECLAIM_DISTANCE is 20. 20 is the distance to another node in the same component (enclosure or motherboard) on IA64. The meaning of the NUMA distance information seems to vary by arch. If zone reclaim is not successful then no further reclaim attempts will occur for a certain time period (ZONE_RECLAIM_INTERVAL). This patch was discussed before. See http://marc.theaimsgroup.com/?l=linux-kernel&m=113519961504207&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113408418232531&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113389027420032&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113380938612205&w=2 Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 12 +++++++----- include/linux/swap.h | 11 +++++++++++ include/linux/topology.h | 8 ++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 34cbefd2ebde..93a849f742db 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -149,14 +149,16 @@ struct zone { unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ - /* - * Does the allocator try to reclaim pages from the zone as soon - * as it fails a watermark_ok() in __alloc_pages? - */ - int reclaim_pages; /* A count of how many reclaimers are scanning this zone */ atomic_t reclaim_in_progress; + /* + * timestamp (in jiffies) of the last zone reclaim that did not + * result in freeing of pages. This is used to avoid repeated scans + * if all memory in the zone is in use. + */ + unsigned long last_unsuccessful_zone_reclaim; + /* * prev_priority holds the scanning priority for this zone. It is * defined as the scanning priority at which we achieved our reclaim diff --git a/include/linux/swap.h b/include/linux/swap.h index d01f7efb0f2c..4a99e4a7fbf3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -176,6 +176,17 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +#ifdef CONFIG_NUMA +extern int zone_reclaim_mode; +extern int zone_reclaim(struct zone *, gfp_t, unsigned int); +#else +#define zone_reclaim_mode 0 +static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) +{ + return 0; +} +#endif + #ifdef CONFIG_MIGRATION extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); diff --git a/include/linux/topology.h b/include/linux/topology.h index 315a5163d6a0..e8eb0040ce3a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -56,6 +56,14 @@ #define REMOTE_DISTANCE 20 #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif +#ifndef RECLAIM_DISTANCE +/* + * If the distance between nodes in a system is larger than RECLAIM_DISTANCE + * (in whatever arch specific measurement units returned by node_distance()) + * then switch on zone reclaim on boot. + */ +#define RECLAIM_DISTANCE 20 +#endif #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif -- cgit v1.2.3 From 1743660b911bfb849b1fb33830522254561b9f9b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:32 -0800 Subject: [PATCH] Zone reclaim: proc override proc support for zone reclaim This patch creates a proc entry /proc/sys/vm/zone_reclaim_mode that may be used to override the automatic determination of the zone reclaim made on bootup. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 7f472127b7b5..8352a7ce5895 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -182,6 +182,7 @@ enum VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ + VM_ZONE_RECLAIM_MODE=31,/* reclaim local zone memory before going off node */ }; -- cgit v1.2.3 From dc85da15d42b0efc792b0f5eab774dc5dbc1ceec Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:36 -0800 Subject: [PATCH] NUMA policies in the slab allocator V2 This patch fixes a regression in 2.6.14 against 2.6.13 that causes an imbalance in memory allocation during bootup. The slab allocator in 2.6.13 is not numa aware and simply calls alloc_pages(). This means that memory policies may control the behavior of alloc_pages(). During bootup the memory policy is set to MPOL_INTERLEAVE resulting in the spreading out of allocations during bootup over all available nodes. The slab allocator in 2.6.13 has only a single list of slab pages. As a result the per cpu slab cache and the spinlock controlled page lists may contain slab entries from off node memory. The slab allocator in 2.6.13 makes no effort to discern the locality of an entry on its lists. The NUMA aware slab allocator in 2.6.14 controls locality of the slab pages explicitly by calling alloc_pages_node(). The NUMA slab allocator manages slab entries by having lists of available slab pages for each node. The per cpu slab cache can only contain slab entries associated with the node local to the processor. This guarantees that the default allocation mode of the slab allocator always assigns local memory if available. Setting MPOL_INTERLEAVE as a default policy during bootup has no effect anymore. In 2.6.14 all node unspecific slab allocations are performed on the boot processor. This means that most of key data structures are allocated on one node. Most processors will have to refer to these structures making the boot node a potential bottleneck. This may reduce performance and cause unnecessary memory pressure on the boot node. This patch implements NUMA policies in the slab layer. There is the need of explicit application of NUMA memory policies by the slab allcator itself since the NUMA slab allocator does no longer let the page_allocator control locality. The check for policies is made directly at the beginning of __cache_alloc using current->mempolicy. The memory policy is already frequently checked by the page allocator (alloc_page_vma() and alloc_page_current()). So it is highly likely that the cacheline is present. For MPOL_INTERLEAVE kmalloc() will spread out each request to one node after another so that an equal distribution of allocations can be obtained during bootup. It is not possible to push the policy check to lower layers of the NUMA slab allocator since the per cpu caches are now only containing slab entries from the current node. If the policy says that the local node is not to be preferred or forbidden then there is no point in checking the slab cache or local list of slab pages. The allocation better be directed immediately to the lists containing slab entries for the allowed set of nodes. This way of applying policy also fixes another strange behavior in 2.6.13. alloc_pages() is controlled by the memory allocation policy of the current process. It could therefore be that one process is running with MPOL_INTERLEAVE and would f.e. obtain a new page following that policy since no slab entries are in the lists anymore. A page can typically be used for multiple slab entries but lets say that the current process is only using one. The other entries are then added to the slab lists. These are now non local entries in the slab lists despite of the possible availability of local pages that would provide faster access and increase the performance of the application. Another process without MPOL_INTERLEAVE may now run and expect a local slab entry from kmalloc(). However, there are still these free slab entries from the off node page obtained from the other process via MPOL_INTERLEAVE in the cache. The process will then get an off node slab entry although other slab entries may be available that are local to that process. This means that the policy if one process may contaminate the locality of the slab caches for other processes. This patch in effect insures that a per process policy is followed for the allocation of slab entries and that there cannot be a memory policy influence from one process to another. A process with default policy will always get a local slab entry if one is available. And the process using memory policies will get its memory arranged as requested. Off-node slab allocation will require the use of spinlocks and will make the use of per cpu caches not possible. A process using memory policies to redirect allocations offnode will have to cope with additional lock overhead in addition to the latency added by the need to access a remote slab entry. Changes V1->V2 - Remove #ifdef CONFIG_NUMA by moving forward declaration into prior #ifdef CONFIG_NUMA section. - Give the function determining the node number to use a saner name. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d6a53ed6ab6c..bbd2221923c3 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -159,6 +159,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); +extern unsigned slab_node(struct mempolicy *policy); extern int policy_zone; -- cgit v1.2.3 From 5131cf154ad1c6e584efa58d17a469d0b80f49bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2006 17:43:04 -0800 Subject: [PATCH] add missing syscall declarations All standard system calls should be declared in include/linux/syscalls.h. Add some of the new additions that were previously missed. Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3eed47347013..e666d6070569 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -510,9 +510,24 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, - unsigned long maxnode); + unsigned long maxnode); asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, - const unsigned long __user *from, const unsigned long __user *to); + const unsigned long __user *from, + const unsigned long __user *to); +asmlinkage long sys_mbind(unsigned long start, unsigned long len, + unsigned long mode, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned flags); +asmlinkage long sys_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, unsigned long flags); + +asmlinkage long sys_inotify_init(void); +asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, + u32 mask); +asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); -- cgit v1.2.3 From f193fbab2e4710f629e7c1859d4908646b47b126 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Wed, 18 Jan 2006 17:43:13 -0800 Subject: [PATCH] nfsd: check error status from nfsd_sync_dir Change nfsd_sync_dir to return an error if ->sync fails, and pass that error up through the stack. This involves a number of rearrangements of error paths, and care to distinguish between Linux -errno numbers and NFSERR numbers. In the 'create' routines, we continue with the 'setattr' even if a previous sync_dir failed. This patch is quite different from Takashi's in a few ways, but there is still a strong lineage. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 51c231a1e5a6..ec7c2e872d72 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -124,7 +124,7 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); -void nfsd_sync_dir(struct dentry *dp); +int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3 From 1918e341383ab787d6c5b17200f4ed901b10c777 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Jan 2006 17:43:16 -0800 Subject: [PATCH] svcrpc: save and restore the daddr field when request deferred The server code currently keeps track of the destination address on every request so that it can reply using the same address. However we forget to do that in the case of a deferred request. Remedy this oversight. >From folks at PolyServe. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e4086ec8b952..50cab2a09f28 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -246,6 +246,7 @@ struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct sockaddr_in addr; struct svc_sock *svsk; /* where reply must go */ + u32 daddr; /* where reply must come from */ struct cache_deferred_req handle; int argslen; u32 args[0]; -- cgit v1.2.3 From 3a65588adc4401622b204caa897123e16a4a0318 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Jan 2006 17:43:19 -0800 Subject: [PATCH] nfsd4: rename lk_stateowner One of the things that's confusing about nfsd4_lock is that the lk_stateowner field could be set to either of two different lockowners: the open owner or the lock owner. Rename to lk_replay_owner and add a comment to make it clear that it's used for whichever stateowner has its sequence id bumped for replay detection. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/xdr4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 8903688890ce..77adba7d2281 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -145,8 +145,9 @@ struct nfsd4_lock { } ok; struct nfsd4_lock_denied denied; } u; - - struct nfs4_stateowner *lk_stateowner; + /* The lk_replay_owner is the open owner in the open_to_lock_owner + * case and the lock owner otherwise: */ + struct nfs4_stateowner *lk_replay_owner; }; #define lk_new_open_seqid v.new.open_seqid #define lk_new_open_stateid v.new.open_stateid -- cgit v1.2.3 From 5590ff0d5528b60153c0b4e7b771472b5a95e297 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 18 Jan 2006 17:43:53 -0800 Subject: [PATCH] vfs: *at functions: core Here is a series of patches which introduce in total 13 new system calls which take a file descriptor/filename pair instead of a single file name. These functions, openat etc, have been discussed on numerous occasions. They are needed to implement race-free filesystem traversal, they are necessary to implement a virtual per-thread current working directory (think multi-threaded backup software), etc. We have in glibc today implementations of the interfaces which use the /proc/self/fd magic. But this code is rather expensive. Here are some results (similar to what Jim Meyering posted before). The test creates a deep directory hierarchy on a tmpfs filesystem. Then rm -fr is used to remove all directories. Without syscall support I get this: real 0m31.921s user 0m0.688s sys 0m31.234s With syscall support the results are much better: real 0m20.699s user 0m0.536s sys 0m20.149s The interfaces are for obvious reasons currently not much used. But they'll be used. coreutils (and Jeff's posixutils) are already using them. Furthermore, code like ftw/fts in libc (maybe even glob) will also start using them. I expect a patch to make follow soon. Every program which is walking the filesystem tree will benefit. Signed-off-by: Ulrich Drepper Signed-off-by: Alexey Dobriyan Cc: Christoph Hellwig Cc: Al Viro Acked-by: Ingo Molnar Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fcntl.h | 7 +++++++ include/linux/fs.h | 7 +++++-- include/linux/namei.h | 7 ++++--- include/linux/time.h | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 8a7c82151de9..c52a63755fdd 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -23,6 +23,13 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ + #ifdef __KERNEL__ #ifndef force_o_largefile diff --git a/include/linux/fs.h b/include/linux/fs.h index b77f2608eef9..84bb449b9b01 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1340,7 +1340,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode) extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); -extern long do_sys_open(const char __user *filename, int flags, int mode); +extern long do_sys_open(int fdf, const char __user *filename, int flags, + int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); @@ -1479,7 +1480,7 @@ static inline void allow_write_access(struct file *file) } extern int do_pipe(int *); -extern int open_namei(const char *, int, int, struct nameidata *); +extern int open_namei(int dfd, const char *, int, int, struct nameidata *); extern int may_open(struct nameidata *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); @@ -1677,6 +1678,8 @@ extern int vfs_readdir(struct file *, filldir_t, void *); extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); +extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); +extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index b699e427c00c..e6698013e4d0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -56,10 +56,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) + __user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ - __user_walk(name, 0, nd) + __user_walk_fd(AT_FDCWD, name, 0, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); @@ -67,7 +68,7 @@ extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); -extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); diff --git a/include/linux/time.h b/include/linux/time.h index f2aca7ec6325..614dd8465839 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -74,7 +74,7 @@ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) -extern long do_utimes(char __user *filename, struct timeval *times); +extern long do_utimes(int dfd, char __user *filename, struct timeval *times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -- cgit v1.2.3 From 150256d8aadb3a337c31efa9e175cbd25bf06b06 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2006 17:43:57 -0800 Subject: [PATCH] Generic sys_rt_sigsuspend() The TIF_RESTORE_SIGMASK flag allows us to have a generic implementation of sys_rt_sigsuspend() instead of duplicating it for each architecture. This provides such an implementation and makes arch/powerpc use it. It also tidies up the ppc32 sys_sigsuspend() to use TIF_RESTORE_SIGMASK. Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2df1a1a2fee5..0cfcd1c7865e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -809,6 +809,7 @@ struct task_struct { struct sighand_struct *sighand; sigset_t blocked, real_blocked; + sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */ struct sigpending pending; unsigned long sas_ss_sp; -- cgit v1.2.3 From 9f72949f679df06021c9e43886c9191494fdb007 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2006 17:44:05 -0800 Subject: [PATCH] Add pselect/ppoll system call implementation The following implementation of ppoll() and pselect() system calls depends on the architecture providing a TIF_RESTORE_SIGMASK flag in the thread_info. These system calls have to change the signal mask during their operation, and signal handlers must be invoked using the new, temporary signal mask. The old signal mask must be restored either upon successful exit from the system call, or upon returning from the invoked signal handler if the system call is interrupted. We can't simply restore the original signal mask and return to userspace, since the restored signal mask may actually block the signal which interrupted the system call. The TIF_RESTORE_SIGMASK flag deals with this by causing the syscall exit path to trap into do_signal() just as TIF_SIGPENDING does, and by causing do_signal() to use the saved signal mask instead of the current signal mask when setting up the stack frame for the signal handler -- or by causing do_signal() to simply restore the saved signal mask in the case where there is no handler to be invoked. The first patch implements the sys_pselect() and sys_ppoll() system calls, which are present only if TIF_RESTORE_SIGMASK is defined. That #ifdef should go away in time when all architectures have implemented it. The second patch implements TIF_RESTORE_SIGMASK for the PowerPC kernel (in the -mm tree), and the third patch then removes the arch-specific implementations of sys_rt_sigsuspend() and replaces them with generic versions using the same trick. The fourth and fifth patches, provided by David Howells, implement TIF_RESTORE_SIGMASK for FR-V and i386 respectively, and the sixth patch adds the syscalls to the i386 syscall table. This patch: Add the pselect() and ppoll() system calls, providing core routines usable by the original select() and poll() system calls and also the new calls (with their semantics w.r.t timeouts). Signed-off-by: David Woodhouse Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index f6da702088f4..8e8f6098508a 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -92,7 +92,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset) memset(fdset, 0, FDS_BYTES(nr)); } -extern int do_select(int n, fd_set_bits *fds, long *timeout); +#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) + +extern int do_select(int n, fd_set_bits *fds, s64 *timeout); +extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, + s64 *timeout); #endif /* KERNEL */ -- cgit v1.2.3 From 4f2d7680cb1ac5c5a70f3ba2447d5aa5c0a1643a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jan 2006 16:58:37 -0800 Subject: [NETFILTER] x_tables: Make XT_ALIGN align as strictly as necessary. Or else we break on ppc32 and other 32-bit platforms. Based upon a patch from Harald Welte. Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 472f04834809..59ff6c430cf6 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -19,7 +19,7 @@ struct xt_get_revision /* For standard target */ #define XT_RETURN (-NF_REPEAT - 1) -#define XT_ALIGN(s) (((s) + (__alignof__(void *)-1)) & ~(__alignof__(void *)-1)) +#define XT_ALIGN(s) (((s) + (__alignof__(u_int64_t)-1)) & ~(__alignof__(u_int64_t)-1)) /* Standard return verdict, or do jump. */ #define XT_STANDARD_TARGET "" -- cgit v1.2.3