From 9834a2bb4970547540222fcba04e0a37d04cb0a0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 17 Jan 2006 11:52:12 -0800 Subject: [SCTP]: Fix sctp_cookie alignment in the packet. On 64 bit architectures, sctp_cookie sent as part of INIT-ACK is not aligned on a 64 bit boundry and thus causes unaligned access exceptions. The layout of the cookie prameter is this: |<----- Parameter Header --------------------|<--- Cookie DATA -------- ----------------------------------------------------------------------- | param type (16 bits) | param len (16 bits) | sig [32 bytes] | cookie.. ----------------------------------------------------------------------- The cookie data portion contains 64 bit values on 64 bit architechtures (timeval) that fall on a 32 bit alignment boundry when used as part of the on-wire format, but align correctly when used in internal structures. This patch explicitely pads the on-wire format so that it is properly aligned. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala --- include/net/sctp/structs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f5c22d77feab..72aeae4a0067 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -405,8 +405,9 @@ struct sctp_cookie { /* The format of our cookie that we send to our peer. */ struct sctp_signed_cookie { __u8 signature[SCTP_SECRET_SIZE]; + __u32 __pad; /* force sctp_cookie alignment to 64 bits */ struct sctp_cookie c; -}; +} __attribute__((packed)); /* This is another convenience type to allocate memory for address * params for the maximum size and pass such structures around -- cgit v1.2.3 From 8116ffad4180b39d7a755345c1fde09da83930c0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 17 Jan 2006 11:55:17 -0800 Subject: [SCTP]: Fix bad sysctl formatting of SCTP timeout values on 64-bit m/cs. Change all the structure members that hold jiffies to be of type unsigned long. This also corrects bad sysctl formating on 64 bit architectures. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala --- include/net/sctp/structs.h | 78 ++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 72aeae4a0067..ad3d15cb0a0d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -127,9 +127,9 @@ extern struct sctp_globals { * RTO.Alpha - 1/8 (3 when converted to right shifts.) * RTO.Beta - 1/4 (2 when converted to right shifts.) */ - __u32 rto_initial; - __u32 rto_min; - __u32 rto_max; + unsigned long rto_initial; + unsigned long rto_min; + unsigned long rto_max; /* Note: rto_alpha and rto_beta are really defined as inverse * powers of two to facilitate integer operations. @@ -140,12 +140,18 @@ extern struct sctp_globals { /* Max.Burst - 4 */ int max_burst; - /* Valid.Cookie.Life - 60 seconds */ - int valid_cookie_life; - /* Whether Cookie Preservative is enabled(1) or not(0) */ int cookie_preserve_enable; + /* Valid.Cookie.Life - 60 seconds */ + unsigned long valid_cookie_life; + + /* Delayed SACK timeout 200ms default*/ + unsigned long sack_timeout; + + /* HB.interval - 30 seconds */ + unsigned long hb_interval; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts @@ -168,12 +174,6 @@ extern struct sctp_globals { */ int rcvbuf_policy; - /* Delayed SACK timeout 200ms default*/ - int sack_timeout; - - /* HB.interval - 30 seconds */ - int hb_interval; - /* The following variables are implementation specific. */ /* Default initialization values to be applied to new associations. */ @@ -828,7 +828,7 @@ struct sctp_transport { __u32 rtt; /* This is the most recent RTT. */ /* RTO : The current retransmission timeout value. */ - __u32 rto; + unsigned long rto; /* RTTVAR : The current RTT variation. */ __u32 rttvar; @@ -878,22 +878,10 @@ struct sctp_transport { /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. */ - __u32 hbinterval; - - /* This is the max_retrans value for the transport and will - * be initialized from the assocs value. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. - */ - __u16 pathmaxrxt; - - /* PMTU : The current known path MTU. */ - __u32 pathmtu; + unsigned long hbinterval; /* SACK delay timeout */ - __u32 sackdelay; - - /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ - __u32 param_flags; + unsigned long sackdelay; /* When was the last time (in jiffies) that we heard from this * transport? We use this to pick new active and retran paths. @@ -905,6 +893,18 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; + /* This is the max_retrans value for the transport and will + * be initialized from the assocs value. This can be changed + * using SCTP_SET_PEER_ADDR_PARAMS socket option. + */ + __u16 pathmaxrxt; + + /* PMTU : The current known path MTU. */ + __u32 pathmtu; + + /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ + __u32 param_flags; + /* The number of times INIT has been sent on this transport. */ int init_sent_count; @@ -1500,9 +1500,9 @@ struct sctp_association { * These values will be initialized by system defaults, but can * be modified via the SCTP_RTOINFO socket option. */ - __u32 rto_initial; - __u32 rto_max; - __u32 rto_min; + unsigned long rto_initial; + unsigned long rto_max; + unsigned long rto_min; /* Maximum number of new data packets that can be sent in a burst. */ int max_burst; @@ -1520,13 +1520,13 @@ struct sctp_association { __u16 init_retries; /* The largest timeout or RTO value to use in attempting an INIT */ - __u16 max_init_timeo; + unsigned long max_init_timeo; /* Heartbeat interval: The endpoint sends out a Heartbeat chunk to * the destination address every heartbeat interval. This value * will be inherited by all new transports. */ - __u32 hbinterval; + unsigned long hbinterval; /* This is the max_retrans value for new transports in the * association. @@ -1538,13 +1538,14 @@ struct sctp_association { */ __u32 pathmtu; - /* SACK delay timeout */ - __u32 sackdelay; - /* Flags controling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; - int timeouts[SCTP_NUM_TIMEOUT_TYPES]; + /* SACK delay timeout */ + unsigned long sackdelay; + + + unsigned long timeouts[SCTP_NUM_TIMEOUT_TYPES]; struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES]; /* Transport to which SHUTDOWN chunk was last sent. */ @@ -1649,7 +1650,10 @@ struct sctp_association { /* How many duplicated TSNs have we seen? */ int numduptsns; - /* Number of seconds of idle time before an association is closed. */ + /* Number of seconds of idle time before an association is closed. + * In the association context, this is really used as a boolean + * since the real timeout is stored in the timeouts array + */ __u32 autoclose; /* These are to support -- cgit v1.2.3 From 313e7b4d2588539e388d31c1febd50503a0083fc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 17 Jan 2006 11:55:57 -0800 Subject: [SCTP]: Fix machine check/connection hang on IA64. sctp_unpack_cookie used an on-stack array called digest as a result/out parameter in the call to crypto_hmac. However, hmac code (crypto_hmac_final) assumes that the 'out' argument is in virtual memory (identity mapped region) and can use virt_to_page call on it. This does not work with the on-stack declared digest. The problems observed so far have been: a) incorrect hmac digest b) machine check and hardware reset. Solution is to define the digest in an identity mapped region by kmalloc'ing it. We can do this once as part of the endpoint structure and re-use it when verifying the SCTP cookie. Signed-off-by: Vlad Yasevich Signed-off-by: Sridhar Samudrala --- include/net/sctp/structs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ad3d15cb0a0d..8c522ae031bb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1250,6 +1250,14 @@ struct sctp_endpoint { int last_key; int key_changed_at; + /* digest: This is a digest of the sctp cookie. This field is + * only used on the receive path when we try to validate + * that the cookie has not been tampered with. We put + * this here so we pre-allocate this once and can re-use + * on every receive. + */ + __u8 digest[SCTP_SIGNATURE_SIZE]; + /* sendbuf acct. policy. */ __u32 sndbuf_policy; -- cgit v1.2.3 From c4d2444e992c4eda1d7fc3287e93ba58295bf6b9 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 17 Jan 2006 11:56:26 -0800 Subject: [SCTP]: Fix couple of races between sctp_peeloff() and sctp_rcv(). Validate and update the sk in sctp_rcv() to avoid the race where an assoc/ep could move to a different socket after we get the sk, but before the skb is added to the backlog. Also migrate the skb's in backlog queue to new sk when doing a peeloff. Signed-off-by: Sridhar Samudrala --- include/net/sctp/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a553f39f6aee..e673b2c984e9 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -175,6 +175,8 @@ void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); +void sctp_backlog_migrate(struct sctp_association *assoc, + struct sock *oldsk, struct sock *newsk); /* * Section: Macros, externs, and inlines -- cgit v1.2.3 From f9e505a9a03df5acace6e758c8d12982635a1c64 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 17 Jan 2006 12:52:21 -0500 Subject: [IA64-SGI] sn2 mutex conversion Migrate sn2 code to use mutex and completion events rather than semaphores. Signed-off-by: Jes Sorensen Acked-by: Dean Nelson Signed-off-by: Tony Luck --- include/asm-ia64/sn/xp.h | 3 ++- include/asm-ia64/sn/xpc.h | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h index 203945ae034e..9bd2f9bf329b 100644 --- a/include/asm-ia64/sn/xp.h +++ b/include/asm-ia64/sn/xp.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -359,7 +360,7 @@ typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid, * the channel. */ struct xpc_registration { - struct semaphore sema; + struct mutex mutex; xpc_channel_func func; /* function to call */ void *key; /* pointer to user's key */ u16 nentries; /* #of msg entries in local msg queue */ diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index 87e9cd588510..0c36928ffd8b 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -335,8 +337,7 @@ struct xpc_openclose_args { * and consumed by the intended recipient. */ struct xpc_notify { - struct semaphore sema; /* notify semaphore */ - volatile u8 type; /* type of notification */ + volatile u8 type; /* type of notification */ /* the following two fields are only used if type == XPC_N_CALL */ xpc_notify_func func; /* user's notify function */ @@ -465,8 +466,8 @@ struct xpc_channel { xpc_channel_func func; /* user's channel function */ void *key; /* pointer to user's key */ - struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ - struct semaphore wdisconnect_sema; /* wait for channel disconnect */ + struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ + struct completion wdisconnect_wait; /* wait for channel disconnect */ struct xpc_openclose_args *local_openclose_args; /* args passed on */ /* opening or closing of channel */ -- cgit v1.2.3 From 8d08aed8d7714683b33666cc066e20f957dda01d Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 17 Jan 2006 15:42:46 -0600 Subject: [IA64] Zonelists for nodes without cpus If a node runs out of memory, ensure that memory on nodes w/o cpus is used before using memory on nodes with cpus. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- include/asm-ia64/topology.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index d8aae4da3978..412ef8e493a8 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -18,6 +18,10 @@ #include #ifdef CONFIG_NUMA + +/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */ +#define PENALTY_FOR_NODE_WITH_CPUS 255 + /* * Returns the number of the node containing CPU 'cpu' */ -- cgit v1.2.3 From 4b16bfbf8f8013fefb49592d030ff87651ab48cb Mon Sep 17 00:00:00 2001 From: Zoltan Menyhart Date: Fri, 13 Jan 2006 17:25:23 +0100 Subject: [IA64] Fix bug in ia64 specific down() function Chen, Kenneth W wrote: > The memory order semantics for include/asm-ia64/semaphore.h:down() > doesn't look right. It is using atomic_dec_return, which eventually > translate into ia64_fetch_and_add() that uses release semantics. > Shouldn't it use acquire semantics? Use ia64_fetchadd() instead of atomic_dec_return() Acked-by: Ken Chen Signed-off-by: Tony Luck --- include/asm-ia64/semaphore.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-ia64/semaphore.h b/include/asm-ia64/semaphore.h index bb8906285fab..f483eeb95dd1 100644 --- a/include/asm-ia64/semaphore.h +++ b/include/asm-ia64/semaphore.h @@ -61,7 +61,7 @@ static inline void down (struct semaphore *sem) { might_sleep(); - if (atomic_dec_return(&sem->count) < 0) + if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1) __down(sem); } @@ -75,7 +75,7 @@ down_interruptible (struct semaphore * sem) int ret = 0; might_sleep(); - if (atomic_dec_return(&sem->count) < 0) + if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1) ret = __down_interruptible(sem); return ret; } @@ -85,7 +85,7 @@ down_trylock (struct semaphore *sem) { int ret = 0; - if (atomic_dec_return(&sem->count) < 0) + if (ia64_fetchadd(-1, &sem->count.counter, acq) < 1) ret = __down_trylock(sem); return ret; } @@ -93,7 +93,7 @@ down_trylock (struct semaphore *sem) static inline void up (struct semaphore * sem) { - if (atomic_inc_return(&sem->count) <= 0) + if (ia64_fetchadd(1, &sem->count.counter, rel) <= -1) __up(sem); } -- cgit v1.2.3 From f07adc591e6ff100773b93b643f58d9773df6e21 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 17 Jan 2006 15:27:09 -0800 Subject: ARM: OMAP: 1/4 Fix clock framework to use clk_enable/disable This patch fixes OMAP clock framework to use clk_enable/disable instead of clk_use/unuse as specified in include/linux/clk.h. Instances of clk_use/unuse are renamed to clk_enable/disable, and references clk_use/unuse are removed. Signed-off-by: Tony Lindgren --- include/asm-arm/arch-omap/clock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-arm/arch-omap/clock.h b/include/asm-arm/arch-omap/clock.h index 740c297eb11c..46a0402696de 100644 --- a/include/asm-arm/arch-omap/clock.h +++ b/include/asm-arm/arch-omap/clock.h @@ -38,8 +38,6 @@ struct clk { struct clk_functions { int (*clk_enable)(struct clk *clk); void (*clk_disable)(struct clk *clk); - int (*clk_use)(struct clk *clk); - void (*clk_unuse)(struct clk *clk); long (*clk_round_rate)(struct clk *clk, unsigned long rate); int (*clk_set_rate)(struct clk *clk, unsigned long rate); int (*clk_set_parent)(struct clk *clk, struct clk *parent); -- cgit v1.2.3 From 16cb4b333c9e7a00ce3b1d74ec0c9b4c2e956910 Mon Sep 17 00:00:00 2001 From: Per Liden Date: Fri, 13 Jan 2006 22:22:22 +0100 Subject: [TIPC] Updated link priority macros Added macros for min/default/max link priority in tipc_config.h. Also renamed TIPC_NUM_LINK_PRI to TIPC_MEDIA_LINK_PRI since that is a more accurate description of what it is used for. Signed-off-by: Per Liden --- include/linux/tipc_config.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index a52c8c64a5a3..33a653913d94 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -168,10 +168,13 @@ #define TIPC_MAX_LINK_NAME 60 /* format = Z.C.N:interface-Z.C.N:interface */ /* - * Link priority limits (range from 0 to # priorities - 1) + * Link priority limits (min, default, max, media default) */ -#define TIPC_NUM_LINK_PRI 32 +#define TIPC_MIN_LINK_PRI 0 +#define TIPC_DEF_LINK_PRI 10 +#define TIPC_MAX_LINK_PRI 31 +#define TIPC_MEDIA_LINK_PRI (TIPC_MAX_LINK_PRI + 1) /* * Link tolerance limits (min, default, max), in ms -- cgit v1.2.3 From 33a9c4da5ab16192ef1e961d4c4e45c18031cd67 Mon Sep 17 00:00:00 2001 From: Per Liden Date: Mon, 16 Jan 2006 11:42:12 +0100 Subject: [TIPC] Move ethernet protocol id to linux/if_ether.h Signed-off-by: Per Liden --- include/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index fe26d431de87..7a92c1ce1457 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -72,6 +72,7 @@ * over Ethernet */ #define ETH_P_AOE 0x88A2 /* ATA over Ethernet */ +#define ETH_P_TIPC 0x88CA /* TIPC */ /* * Non DIX types. Won't clash for 1500 types. -- cgit v1.2.3 From d9004eb466d03b7900ed432fecec6819012b4ed3 Mon Sep 17 00:00:00 2001 From: Alon Bar-Lev Date: Wed, 18 Jan 2006 11:47:33 +0000 Subject: [SERIAL] Add 8250 support for Decision Computer International Co. PCCOM2 There is a new device which is look like: Serial controller: Decision Computer International Co. PCCOM2 (rev 02) (prog-if 02 [16550]) 0700: 6666:0004 (rev 02) (prog-if 02) Flags: medium devsel, IRQ 177 Memory at fe000000 (32-bit, non-prefetchable) [size=128] I/O ports at e880 [size=128] I/O ports at e400 [size=256] It has two 16550A, and is not listed in kernel, although the manufacturer clams that it is supported... I've created the following patch, it only add the new PCI id and the card to the repository, it seems to work. Signed-off-by: Alon Bar-Lev Signed-off-by: Russell King --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 5403257ae3e7..ecc1fc1f0f04 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1992,6 +1992,7 @@ #define PCI_VENDOR_ID_DCI 0x6666 #define PCI_DEVICE_ID_DCI_PCCOM4 0x0001 #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 +#define PCI_DEVICE_ID_DCI_PCCOM2 0x0004 #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 -- cgit v1.2.3 From 68477d11769ce8c6830523f08637894c43885c7e Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 18 Jan 2006 22:38:44 +0000 Subject: [ARM] 3267/1: PXA27x SSP controller register defines Patch from David Vrabel PXA27x SSP controller has a few different registers, including SCR (serial clock rate) in SSCR0. Signed-off-by: David Vrabel Signed-off-by: Russell King --- include/asm-arm/arch-pxa/pxa-regs.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index dae138b9cac5..1409c5bd703f 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -108,6 +108,7 @@ #define DCSR_STARTINTR (1 << 1) /* Start Interrupt (read / write) */ #define DCSR_BUSERR (1 << 0) /* Bus Error Interrupt (read / write) */ +#define DALGN __REG(0x400000a0) /* DMA Alignment Register */ #define DINT __REG(0x400000f0) /* DMA Interrupt Register */ #define DRCMR(n) __REG2(0x40000100, (n)<<2) @@ -1614,8 +1615,21 @@ #define SSCR0_National (0x2 << 4) /* National Microwire */ #define SSCR0_ECS (1 << 6) /* External clock select */ #define SSCR0_SSE (1 << 7) /* Synchronous Serial Port Enable */ +#if defined(CONFIG_PXA25x) #define SSCR0_SCR (0x0000ff00) /* Serial Clock Rate (mask) */ #define SSCR0_SerClkDiv(x) ((((x) - 2)/2) << 8) /* Divisor [2..512] */ +#elif defined(CONFIG_PXA27x) +#define SSCR0_SCR (0x000fff00) /* Serial Clock Rate (mask) */ +#define SSCR0_SerClkDiv(x) (((x) - 1) << 8) /* Divisor [1..4096] */ +#define SSCR0_EDSS (1 << 20) /* Extended data size select */ +#define SSCR0_NCS (1 << 21) /* Network clock select */ +#define SSCR0_RIM (1 << 22) /* Receive FIFO overrrun interrupt mask */ +#define SSCR0_TUM (1 << 23) /* Transmit FIFO underrun interrupt mask */ +#define SSCR0_FRDC (0x07000000) /* Frame rate divider control (mask) */ +#define SSCR0_SlotsPerFrm(c) ((x) - 1) /* Time slots per frame [1..8] */ +#define SSCR0_ADC (1 << 30) /* Audio clock select */ +#define SSCR0_MOD (1 << 31) /* Mode (normal or network) */ +#endif #define SSCR1_RIE (1 << 0) /* Receive FIFO Interrupt Enable */ #define SSCR1_TIE (1 << 1) /* Transmit FIFO Interrupt Enable */ -- cgit v1.2.3 From c126cf80d450a4d0aac3de7162d4c14b5c971b24 Mon Sep 17 00:00:00 2001 From: "Eddie C. Dost" Date: Wed, 18 Jan 2006 14:54:31 -0800 Subject: [SPARC64]: Serial Console for E250 Patch From: Eddie C. Dost I have the following patch for serial console over the RSC (remote system controller) on my E250 machine. It basically adds support for input-device=rsc and output-device=rsc from OBP, and allows 115200,8,n,1,- serial mode setting. Signed-off-by: David S. Miller --- include/asm-sparc64/oplib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index d02f1e8ae1a6..3c59b2693fb9 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -163,6 +163,7 @@ enum prom_input_device { PROMDEV_IKBD, /* input from keyboard */ PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ + PROMDEV_IRSC, /* input from rsc */ PROMDEV_I_UNK, }; @@ -174,6 +175,7 @@ enum prom_output_device { PROMDEV_OSCREEN, /* to screen */ PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ + PROMDEV_ORSC, /* to rsc */ PROMDEV_O_UNK, }; -- cgit v1.2.3 From c8d338c8dbc4461a6de1171c2332b8ed547f8f3b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 18 Jan 2006 17:42:22 -0800 Subject: [PATCH] scsi_transport_spi build fix On alpha: In file included from drivers/scsi/sym53c8xx_2/sym_glue.h:59, from drivers/scsi/sym53c8xx_2/sym_fw.c:40: include/scsi/scsi_transport_spi.h:57: error: field `dv_mutex' has incomplete type Cc: James Bottomley Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/scsi/scsi_transport_spi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_spi.h b/include/scsi/scsi_transport_spi.h index 2b5930ba69ec..fb5a2ffae939 100644 --- a/include/scsi/scsi_transport_spi.h +++ b/include/scsi/scsi_transport_spi.h @@ -22,6 +22,7 @@ #include #include +#include struct scsi_transport_template; struct scsi_target; -- cgit v1.2.3 From 053837fce7aa79025ed57656855df09f80175527 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Jan 2006 17:42:27 -0800 Subject: [PATCH] mm: migration page refcounting fix Migration code currently does not take a reference to target page properly, so between unlocking the pte and trying to take a new reference to the page with isolate_lru_page, anything could happen to it. Fix this by holding the pte lock until we get a chance to elevate the refcount. Other small cleanups while we're here. Signed-off-by: Nick Piggin Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 21 --------------------- include/linux/swap.h | 1 + 2 files changed, 1 insertion(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 49cc68af01f8..8ac854f7f190 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page) } } -/* - * Isolate one page from the LRU lists. - * - * - zone->lru_lock must be held - */ -static inline int __isolate_lru_page(struct page *page) -{ - if (unlikely(!TestClearPageLRU(page))) - return 0; - - if (get_page_testone(page)) { - /* - * It is being freed elsewhere - */ - __put_page(page); - SetPageLRU(page); - return -ENOENT; - } - - return 1; -} diff --git a/include/linux/swap.h b/include/linux/swap.h index e92054d6530b..d01f7efb0f2c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); +extern int lru_add_drain_all(void); extern int rotate_reclaimable_page(struct page *page); extern void swap_setup(void); -- cgit v1.2.3 From 9eeff2395e3cfd05c9b2e6074ff943a34b0c5c21 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:31 -0800 Subject: [PATCH] Zone reclaim: Reclaim logic Some bits for zone reclaim exists in 2.6.15 but they are not usable. This patch fixes them up, removes unused code and makes zone reclaim usable. Zone reclaim allows the reclaiming of pages from a zone if the number of free pages falls below the watermarks even if other zones still have enough pages available. Zone reclaim is of particular importance for NUMA machines. It can be more beneficial to reclaim a page than taking the performance penalties that come with allocating a page on a remote zone. Zone reclaim is enabled if the maximum distance to another node is higher than RECLAIM_DISTANCE, which may be defined by an arch. By default RECLAIM_DISTANCE is 20. 20 is the distance to another node in the same component (enclosure or motherboard) on IA64. The meaning of the NUMA distance information seems to vary by arch. If zone reclaim is not successful then no further reclaim attempts will occur for a certain time period (ZONE_RECLAIM_INTERVAL). This patch was discussed before. See http://marc.theaimsgroup.com/?l=linux-kernel&m=113519961504207&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113408418232531&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113389027420032&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113380938612205&w=2 Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 12 +++++++----- include/linux/swap.h | 11 +++++++++++ include/linux/topology.h | 8 ++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 34cbefd2ebde..93a849f742db 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -149,14 +149,16 @@ struct zone { unsigned long pages_scanned; /* since last reclaim */ int all_unreclaimable; /* All pages pinned */ - /* - * Does the allocator try to reclaim pages from the zone as soon - * as it fails a watermark_ok() in __alloc_pages? - */ - int reclaim_pages; /* A count of how many reclaimers are scanning this zone */ atomic_t reclaim_in_progress; + /* + * timestamp (in jiffies) of the last zone reclaim that did not + * result in freeing of pages. This is used to avoid repeated scans + * if all memory in the zone is in use. + */ + unsigned long last_unsuccessful_zone_reclaim; + /* * prev_priority holds the scanning priority for this zone. It is * defined as the scanning priority at which we achieved our reclaim diff --git a/include/linux/swap.h b/include/linux/swap.h index d01f7efb0f2c..4a99e4a7fbf3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -176,6 +176,17 @@ extern int try_to_free_pages(struct zone **, gfp_t); extern int shrink_all_memory(int); extern int vm_swappiness; +#ifdef CONFIG_NUMA +extern int zone_reclaim_mode; +extern int zone_reclaim(struct zone *, gfp_t, unsigned int); +#else +#define zone_reclaim_mode 0 +static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) +{ + return 0; +} +#endif + #ifdef CONFIG_MIGRATION extern int isolate_lru_page(struct page *p); extern int putback_lru_pages(struct list_head *l); diff --git a/include/linux/topology.h b/include/linux/topology.h index 315a5163d6a0..e8eb0040ce3a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -56,6 +56,14 @@ #define REMOTE_DISTANCE 20 #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif +#ifndef RECLAIM_DISTANCE +/* + * If the distance between nodes in a system is larger than RECLAIM_DISTANCE + * (in whatever arch specific measurement units returned by node_distance()) + * then switch on zone reclaim on boot. + */ +#define RECLAIM_DISTANCE 20 +#endif #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif -- cgit v1.2.3 From 1743660b911bfb849b1fb33830522254561b9f9b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:32 -0800 Subject: [PATCH] Zone reclaim: proc override proc support for zone reclaim This patch creates a proc entry /proc/sys/vm/zone_reclaim_mode that may be used to override the automatic determination of the zone reclaim made on bootup. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 7f472127b7b5..8352a7ce5895 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -182,6 +182,7 @@ enum VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */ VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ + VM_ZONE_RECLAIM_MODE=31,/* reclaim local zone memory before going off node */ }; -- cgit v1.2.3 From dc85da15d42b0efc792b0f5eab774dc5dbc1ceec Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 18 Jan 2006 17:42:36 -0800 Subject: [PATCH] NUMA policies in the slab allocator V2 This patch fixes a regression in 2.6.14 against 2.6.13 that causes an imbalance in memory allocation during bootup. The slab allocator in 2.6.13 is not numa aware and simply calls alloc_pages(). This means that memory policies may control the behavior of alloc_pages(). During bootup the memory policy is set to MPOL_INTERLEAVE resulting in the spreading out of allocations during bootup over all available nodes. The slab allocator in 2.6.13 has only a single list of slab pages. As a result the per cpu slab cache and the spinlock controlled page lists may contain slab entries from off node memory. The slab allocator in 2.6.13 makes no effort to discern the locality of an entry on its lists. The NUMA aware slab allocator in 2.6.14 controls locality of the slab pages explicitly by calling alloc_pages_node(). The NUMA slab allocator manages slab entries by having lists of available slab pages for each node. The per cpu slab cache can only contain slab entries associated with the node local to the processor. This guarantees that the default allocation mode of the slab allocator always assigns local memory if available. Setting MPOL_INTERLEAVE as a default policy during bootup has no effect anymore. In 2.6.14 all node unspecific slab allocations are performed on the boot processor. This means that most of key data structures are allocated on one node. Most processors will have to refer to these structures making the boot node a potential bottleneck. This may reduce performance and cause unnecessary memory pressure on the boot node. This patch implements NUMA policies in the slab layer. There is the need of explicit application of NUMA memory policies by the slab allcator itself since the NUMA slab allocator does no longer let the page_allocator control locality. The check for policies is made directly at the beginning of __cache_alloc using current->mempolicy. The memory policy is already frequently checked by the page allocator (alloc_page_vma() and alloc_page_current()). So it is highly likely that the cacheline is present. For MPOL_INTERLEAVE kmalloc() will spread out each request to one node after another so that an equal distribution of allocations can be obtained during bootup. It is not possible to push the policy check to lower layers of the NUMA slab allocator since the per cpu caches are now only containing slab entries from the current node. If the policy says that the local node is not to be preferred or forbidden then there is no point in checking the slab cache or local list of slab pages. The allocation better be directed immediately to the lists containing slab entries for the allowed set of nodes. This way of applying policy also fixes another strange behavior in 2.6.13. alloc_pages() is controlled by the memory allocation policy of the current process. It could therefore be that one process is running with MPOL_INTERLEAVE and would f.e. obtain a new page following that policy since no slab entries are in the lists anymore. A page can typically be used for multiple slab entries but lets say that the current process is only using one. The other entries are then added to the slab lists. These are now non local entries in the slab lists despite of the possible availability of local pages that would provide faster access and increase the performance of the application. Another process without MPOL_INTERLEAVE may now run and expect a local slab entry from kmalloc(). However, there are still these free slab entries from the off node page obtained from the other process via MPOL_INTERLEAVE in the cache. The process will then get an off node slab entry although other slab entries may be available that are local to that process. This means that the policy if one process may contaminate the locality of the slab caches for other processes. This patch in effect insures that a per process policy is followed for the allocation of slab entries and that there cannot be a memory policy influence from one process to another. A process with default policy will always get a local slab entry if one is available. And the process using memory policies will get its memory arranged as requested. Off-node slab allocation will require the use of spinlocks and will make the use of per cpu caches not possible. A process using memory policies to redirect allocations offnode will have to cope with additional lock overhead in addition to the latency added by the need to access a remote slab entry. Changes V1->V2 - Remove #ifdef CONFIG_NUMA by moving forward declaration into prior #ifdef CONFIG_NUMA section. - Give the function determining the node number to use a saner name. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index d6a53ed6ab6c..bbd2221923c3 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -159,6 +159,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern struct mempolicy default_policy; extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); +extern unsigned slab_node(struct mempolicy *policy); extern int policy_zone; -- cgit v1.2.3 From ea1eae75eb596e0628dc5e01d32c78b1f6b257fb Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 18 Jan 2006 17:42:38 -0800 Subject: [PATCH] uml: add __raw_writel definition Add implementations of the write* and __raw_write* functions. __raw_writel is needed by lib/iocopy.c, which shouldn't be used in UML, but which is unconditionally linked in anyway. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/io.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/asm-um/io.h b/include/asm-um/io.h index 90674056dcef..1934d9340e2c 100644 --- a/include/asm-um/io.h +++ b/include/asm-um/io.h @@ -33,4 +33,20 @@ static inline void * phys_to_virt(unsigned long address) */ #define xlate_dev_kmem_ptr(p) p +static inline void writeb(unsigned char b, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *) addr = b; +} +static inline void writew(unsigned short b, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *) addr = b; +} +static inline void writel(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *) addr = b; +} +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + #endif -- cgit v1.2.3 From 3b948068b84b9759cdf0965abf3074dcb9230e98 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Wed, 18 Jan 2006 17:42:52 -0800 Subject: [PATCH] uml: remove leftover from patch revertal I added this line to share this file with UML, but now it's no longer shared so remove this useless leftover. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Acked-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h index e7a271d39309..44b9db806474 100644 --- a/include/asm-i386/futex.h +++ b/include/asm-i386/futex.h @@ -61,7 +61,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (op == FUTEX_OP_SET) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); else { -#if !defined(CONFIG_X86_BSWAP) && !defined(CONFIG_UML) +#ifndef CONFIG_X86_BSWAP if (boot_cpu_data.x86 == 3) ret = -ENOSYS; else -- cgit v1.2.3 From 5131cf154ad1c6e584efa58d17a469d0b80f49bd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Jan 2006 17:43:04 -0800 Subject: [PATCH] add missing syscall declarations All standard system calls should be declared in include/linux/syscalls.h. Add some of the new additions that were previously missed. Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3eed47347013..e666d6070569 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -510,9 +510,24 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, - unsigned long maxnode); + unsigned long maxnode); asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, - const unsigned long __user *from, const unsigned long __user *to); + const unsigned long __user *from, + const unsigned long __user *to); +asmlinkage long sys_mbind(unsigned long start, unsigned long len, + unsigned long mode, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned flags); +asmlinkage long sys_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, unsigned long flags); + +asmlinkage long sys_inotify_init(void); +asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, + u32 mask); +asmlinkage long sys_inotify_rm_watch(int fd, u32 wd); asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus); -- cgit v1.2.3 From f193fbab2e4710f629e7c1859d4908646b47b126 Mon Sep 17 00:00:00 2001 From: YAMAMOTO Takashi Date: Wed, 18 Jan 2006 17:43:13 -0800 Subject: [PATCH] nfsd: check error status from nfsd_sync_dir Change nfsd_sync_dir to return an error if ->sync fails, and pass that error up through the stack. This involves a number of rearrangements of error paths, and care to distinguish between Linux -errno numbers and NFSERR numbers. In the 'create' routines, we continue with the 'setattr' even if a previous sync_dir failed. This patch is quite different from Takashi's in a few ways, but there is still a strong lineage. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 51c231a1e5a6..ec7c2e872d72 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -124,7 +124,7 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); -void nfsd_sync_dir(struct dentry *dp); +int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL -- cgit v1.2.3 From 1918e341383ab787d6c5b17200f4ed901b10c777 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Jan 2006 17:43:16 -0800 Subject: [PATCH] svcrpc: save and restore the daddr field when request deferred The server code currently keeps track of the destination address on every request so that it can reply using the same address. However we forget to do that in the case of a deferred request. Remedy this oversight. >From folks at PolyServe. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e4086ec8b952..50cab2a09f28 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -246,6 +246,7 @@ struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct sockaddr_in addr; struct svc_sock *svsk; /* where reply must go */ + u32 daddr; /* where reply must come from */ struct cache_deferred_req handle; int argslen; u32 args[0]; -- cgit v1.2.3 From 3a65588adc4401622b204caa897123e16a4a0318 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Jan 2006 17:43:19 -0800 Subject: [PATCH] nfsd4: rename lk_stateowner One of the things that's confusing about nfsd4_lock is that the lk_stateowner field could be set to either of two different lockowners: the open owner or the lock owner. Rename to lk_replay_owner and add a comment to make it clear that it's used for whichever stateowner has its sequence id bumped for replay detection. Signed-off-by: J. Bruce Fields Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/xdr4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 8903688890ce..77adba7d2281 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -145,8 +145,9 @@ struct nfsd4_lock { } ok; struct nfsd4_lock_denied denied; } u; - - struct nfs4_stateowner *lk_stateowner; + /* The lk_replay_owner is the open owner in the open_to_lock_owner + * case and the lock owner otherwise: */ + struct nfs4_stateowner *lk_replay_owner; }; #define lk_new_open_seqid v.new.open_seqid #define lk_new_open_stateid v.new.open_stateid -- cgit v1.2.3 From 5590ff0d5528b60153c0b4e7b771472b5a95e297 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 18 Jan 2006 17:43:53 -0800 Subject: [PATCH] vfs: *at functions: core Here is a series of patches which introduce in total 13 new system calls which take a file descriptor/filename pair instead of a single file name. These functions, openat etc, have been discussed on numerous occasions. They are needed to implement race-free filesystem traversal, they are necessary to implement a virtual per-thread current working directory (think multi-threaded backup software), etc. We have in glibc today implementations of the interfaces which use the /proc/self/fd magic. But this code is rather expensive. Here are some results (similar to what Jim Meyering posted before). The test creates a deep directory hierarchy on a tmpfs filesystem. Then rm -fr is used to remove all directories. Without syscall support I get this: real 0m31.921s user 0m0.688s sys 0m31.234s With syscall support the results are much better: real 0m20.699s user 0m0.536s sys 0m20.149s The interfaces are for obvious reasons currently not much used. But they'll be used. coreutils (and Jeff's posixutils) are already using them. Furthermore, code like ftw/fts in libc (maybe even glob) will also start using them. I expect a patch to make follow soon. Every program which is walking the filesystem tree will benefit. Signed-off-by: Ulrich Drepper Signed-off-by: Alexey Dobriyan Cc: Christoph Hellwig Cc: Al Viro Acked-by: Ingo Molnar Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fcntl.h | 7 +++++++ include/linux/fs.h | 7 +++++-- include/linux/namei.h | 7 ++++--- include/linux/time.h | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 8a7c82151de9..c52a63755fdd 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -23,6 +23,13 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ +#define AT_REMOVEDIR 0x200 /* Remove directory instead of + unlinking file. */ + #ifdef __KERNEL__ #ifndef force_o_largefile diff --git a/include/linux/fs.h b/include/linux/fs.h index b77f2608eef9..84bb449b9b01 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1340,7 +1340,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode) extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); -extern long do_sys_open(const char __user *filename, int flags, int mode); +extern long do_sys_open(int fdf, const char __user *filename, int flags, + int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); @@ -1479,7 +1480,7 @@ static inline void allow_write_access(struct file *file) } extern int do_pipe(int *); -extern int open_namei(const char *, int, int, struct nameidata *); +extern int open_namei(int dfd, const char *, int, int, struct nameidata *); extern int may_open(struct nameidata *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); @@ -1677,6 +1678,8 @@ extern int vfs_readdir(struct file *, filldir_t, void *); extern int vfs_stat(char __user *, struct kstat *); extern int vfs_lstat(char __user *, struct kstat *); +extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); +extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index b699e427c00c..e6698013e4d0 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -56,10 +56,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); #define user_path_walk(name,nd) \ - __user_walk(name, LOOKUP_FOLLOW, nd) + __user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ - __user_walk(name, 0, nd) + __user_walk_fd(AT_FDCWD, name, 0, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); @@ -67,7 +68,7 @@ extern void path_release(struct nameidata *); extern void path_release_on_umount(struct nameidata *); extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags); -extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags); +extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); diff --git a/include/linux/time.h b/include/linux/time.h index f2aca7ec6325..614dd8465839 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -74,7 +74,7 @@ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); #define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) -extern long do_utimes(char __user *filename, struct timeval *times); +extern long do_utimes(int dfd, char __user *filename, struct timeval *times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -- cgit v1.2.3 From 4f085507231e8003c66ed12e38c73b76e938ee95 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 18 Jan 2006 17:43:55 -0800 Subject: [PATCH] vfs: *at functions: i386 Wire up the x86 syscalls Signed-off-by: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/unistd.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 481c3c0ea720..e3028aaf6d83 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -300,8 +300,21 @@ #define __NR_inotify_add_watch 292 #define __NR_inotify_rm_watch 293 #define __NR_migrate_pages 294 +#define __NR_openat 295 +#define __NR_mkdirat 296 +#define __NR_mknodat 297 +#define __NR_fchownat 298 +#define __NR_futimesat 299 +#define __NR_newfstatat 300 +#define __NR_unlinkat 301 +#define __NR_renameat 302 +#define __NR_linkat 303 +#define __NR_symlinkat 304 +#define __NR_readlinkat 305 +#define __NR_fchmodat 306 +#define __NR_faccessat 307 -#define NR_syscalls 295 +#define NR_syscalls 308 /* * user-visible error numbers are in the range -1 - -128: see -- cgit v1.2.3 From a60fc5190a31d98508ea6a76f74217f4104e74b7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 18 Jan 2006 17:43:56 -0800 Subject: [PATCH] vfs: *at functions: x86_64 Wire up the x86_64 syscalls. Signed-off-by: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-x86_64/ia32_unistd.h | 15 ++++++++++++++- include/asm-x86_64/unistd.h | 29 ++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h index e8843362a6cc..e87cd83a0e86 100644 --- a/include/asm-x86_64/ia32_unistd.h +++ b/include/asm-x86_64/ia32_unistd.h @@ -300,7 +300,20 @@ #define __NR_ia32_inotify_add_watch 292 #define __NR_ia32_inotify_rm_watch 293 #define __NR_ia32_migrate_pages 294 +#define __NR_ia32_opanat 295 +#define __NR_ia32_mkdirat 296 +#define __NR_ia32_mknodat 297 +#define __NR_ia32_fchownat 298 +#define __NR_ia32_futimesat 299 +#define __NR_ia32_newfstatat 300 +#define __NR_ia32_unlinkat 301 +#define __NR_ia32_renameat 302 +#define __NR_ia32_linkat 303 +#define __NR_ia32_symlinkat 304 +#define __NR_ia32_readlinkat 305 +#define __NR_ia32_fchmodat 306 +#define __NR_ia32_faccessat 307 -#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 308 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index e6f896161c11..436d099b5b6b 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -573,8 +573,35 @@ __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) #define __NR_migrate_pages 256 __SYSCALL(__NR_migrate_pages, sys_migrate_pages) +#define __NR_openat 257 +__SYSCALL(__NR_openat, sys_openat) +#define __NR_mkdirat 258 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 259 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 260 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 261 +__SYSCALL(__NR_futimesat, sys_futimesat) +#define __NR_newfstatat 262 +__SYSCALL(__NR_newfstatat, sys_newfstatat) +#define __NR_unlinkat 263 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 264 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 265 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 266 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 267 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 268 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 269 +__SYSCALL(__NR_faccessat, sys_faccessat) + +#define __NR_syscall_max __NR_faccessat -#define __NR_syscall_max __NR_migrate_pages #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ -- cgit v1.2.3 From 150256d8aadb3a337c31efa9e175cbd25bf06b06 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2006 17:43:57 -0800 Subject: [PATCH] Generic sys_rt_sigsuspend() The TIF_RESTORE_SIGMASK flag allows us to have a generic implementation of sys_rt_sigsuspend() instead of duplicating it for each architecture. This provides such an implementation and makes arch/powerpc use it. It also tidies up the ppc32 sys_sigsuspend() to use TIF_RESTORE_SIGMASK. Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/unistd.h | 2 ++ include/linux/sched.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index 19eaac3fbbf9..76daec496062 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -444,11 +444,13 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6 #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND #ifdef CONFIG_PPC32 #define __ARCH_WANT_OLD_STAT #endif #ifdef CONFIG_PPC64 #define __ARCH_WANT_COMPAT_SYS_TIME +#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND #endif /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 2df1a1a2fee5..0cfcd1c7865e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -809,6 +809,7 @@ struct task_struct { struct sighand_struct *sighand; sigset_t blocked, real_blocked; + sigset_t saved_sigmask; /* To be restored with TIF_RESTORE_SIGMASK */ struct sigpending pending; unsigned long sas_ss_sp; -- cgit v1.2.3 From a411aee96ea7fe6fe065df65bf29ea755bcdb554 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Jan 2006 17:43:59 -0800 Subject: [PATCH] Handle TIF_RESTORE_SIGMASK for FRV Handle TIF_RESTORE_SIGMASK as added by David Woodhouse's patch entitled: [PATCH] 2/3 Add TIF_RESTORE_SIGMASK support for arch/powerpc [PATCH] 3/3 Generic sys_rt_sigsuspend It does the following: (1) Declares TIF_RESTORE_SIGMASK for FRV. (2) Invokes it over to do_signal() when TIF_RESTORE_SIGMASK is set. (3) Makes do_signal() support TIF_RESTORE_SIGMASK, using the signal mask saved in current->saved_sigmask. (4) Discards sys_rt_sigsuspend() from the arch, using the generic one instead. (5) Makes sys_sigsuspend() save the signal mask and set TIF_RESTORE_SIGMASK rather than attempting to fudge the return registers. (6) Makes sys_sigsuspend() return -ERESTARTNOHAND rather than looping intrinsically. (7) Makes setup_frame(), setup_rt_frame() and handle_signal() return 0 or -EFAULT rather than true/false to be consistent with the rest of the kernel. Due to the fact do_signal() is then only called from one place: (8) Make do_signal() no longer have a return value is it was just being ignored; force_sig() takes care of this. (9) Discards the old sigmask argument to do_signal() as it's no longer necessary. This patch depends on the FRV signalling patches as well as the sys_rt_sigsuspend patch. Signed-off-by: David Howells Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-frv/thread_info.h | 2 ++ include/asm-frv/unistd.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h index a5576e02dd1d..ea426abf01d3 100644 --- a/include/asm-frv/thread_info.h +++ b/include/asm-frv/thread_info.h @@ -129,6 +129,7 @@ register struct thread_info *__current_thread_info asm("gr15"); #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_IRET 5 /* return with iret */ +#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ @@ -138,6 +139,7 @@ register struct thread_info *__current_thread_info asm("gr15"); #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_IRET (1 << TIF_IRET) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index cde376a7a857..4d994d2e99e3 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -486,6 +486,7 @@ static inline pid_t wait(int * wait_stat) /* #define __ARCH_WANT_SYS_SIGPENDING */ #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND #endif /* -- cgit v1.2.3 From 283828f3c19ceb3a64a8544d42cc189003e8b0fe Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Jan 2006 17:44:00 -0800 Subject: [PATCH] Handle TIF_RESTORE_SIGMASK for i386 Handle TIF_RESTORE_SIGMASK as added by David Woodhouse's patch entitled: [PATCH] 2/3 Add TIF_RESTORE_SIGMASK support for arch/powerpc [PATCH] 3/3 Generic sys_rt_sigsuspend It does the following: (1) Declares TIF_RESTORE_SIGMASK for i386. (2) Invokes it over to do_signal() when TIF_RESTORE_SIGMASK is set. (3) Makes do_signal() support TIF_RESTORE_SIGMASK, using the signal mask saved in current->saved_sigmask. (4) Discards sys_rt_sigsuspend() from the arch, using the generic one instead. (5) Makes sys_sigsuspend() save the signal mask and set TIF_RESTORE_SIGMASK rather than attempting to fudge the return registers. (6) Makes sys_sigsuspend() return -ERESTARTNOHAND rather than looping intrinsically. (7) Makes setup_frame(), setup_rt_frame() and handle_signal() return 0 or -EFAULT rather than true/false to be consistent with the rest of the kernel. Due to the fact do_signal() is then only called from one place: (8) Makes do_signal() no longer have a return value is it was just being ignored; force_sig() takes care of this. (9) Discards the old sigmask argument to do_signal() as it's no longer necessary. (10) Makes do_signal() static. (11) Marks the second argument to do_notify_resume() as unused. The unused argument should remain in the middle as the arguments are passed in as registers, and the ordering is specific in entry.S Given the way do_signal() is now no longer called from sys_{,rt_}sigsuspend(), they no longer need access to the exception frame, and so can just take arguments normally. This patch depends on sys_rt_sigsuspend patch. Signed-off-by: David Howells Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/signal.h | 1 - include/asm-i386/thread_info.h | 2 ++ include/asm-i386/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/signal.h b/include/asm-i386/signal.h index 76524b4052ac..026fd231488d 100644 --- a/include/asm-i386/signal.h +++ b/include/asm-i386/signal.h @@ -218,7 +218,6 @@ static __inline__ int sigfindinword(unsigned long word) } struct pt_regs; -extern int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); #define ptrace_signal_deliver(regs, cookie) \ do { \ diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 2493e77e8c30..e20e99551d71 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -140,6 +140,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 @@ -152,6 +153,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; #define _TIF_SYSCALL_EMU (1< Date: Wed, 18 Jan 2006 17:44:01 -0800 Subject: [PATCH] TIF_RESTORE_SIGMASK support for arch/powerpc Implement the TIF_RESTORE_SIGMASK flag in the new arch/powerpc kernel, for both 32-bit and 64-bit system call paths. Signed-off-by: David Woodhouse Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/thread_info.h | 5 ++++- include/asm-powerpc/unistd.h | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index 7e09d7cda933..67cdaf3ae9fc 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -122,6 +122,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTOREALL 12 /* Restore all regs (implies NOERROR) */ #define TIF_SAVE_NVGPRS 13 /* Save r14-r31 in signal frame */ #define TIF_NOERROR 14 /* Force successful syscall return */ +#define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< Date: Wed, 18 Jan 2006 17:44:02 -0800 Subject: [PATCH] uml: add TIF_RESTORE_SIGMASK support Add support for TIF_RESTORE_SIGMASK. I copy the i386 handling of the flag. sys_sigsuspend is also changed to follow i386. Also a bit of cleanup - turn an if into a switch get rid of a couple more emacs formatting comments Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/thread_info.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 705c71972c32..17b6b07c4332 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -69,6 +69,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTART_BLOCK 4 #define TIF_MEMDIE 5 #define TIF_SYSCALL_AUDIT 6 +#define TIF_RESTORE_SIGMASK 7 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -76,16 +77,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ -- cgit v1.2.3 From 36a7878a224c18aa4a5e098dc93d19cf5601462b Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Wed, 18 Jan 2006 17:44:03 -0800 Subject: [PATCH] uml: use generic sys_rt_sigsuspend Use the generic sys_rt_sigsuspend. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-um/unistd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-um/unistd.h b/include/asm-um/unistd.h index 6fdde45cc053..afccfcaa9ea9 100644 --- a/include/asm-um/unistd.h +++ b/include/asm-um/unistd.h @@ -34,6 +34,7 @@ extern int um_execve(const char *file, char *const argv[], char *const env[]); #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND #endif #ifdef __KERNEL_SYSCALLS__ -- cgit v1.2.3 From 9f72949f679df06021c9e43886c9191494fdb007 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2006 17:44:05 -0800 Subject: [PATCH] Add pselect/ppoll system call implementation The following implementation of ppoll() and pselect() system calls depends on the architecture providing a TIF_RESTORE_SIGMASK flag in the thread_info. These system calls have to change the signal mask during their operation, and signal handlers must be invoked using the new, temporary signal mask. The old signal mask must be restored either upon successful exit from the system call, or upon returning from the invoked signal handler if the system call is interrupted. We can't simply restore the original signal mask and return to userspace, since the restored signal mask may actually block the signal which interrupted the system call. The TIF_RESTORE_SIGMASK flag deals with this by causing the syscall exit path to trap into do_signal() just as TIF_SIGPENDING does, and by causing do_signal() to use the saved signal mask instead of the current signal mask when setting up the stack frame for the signal handler -- or by causing do_signal() to simply restore the saved signal mask in the case where there is no handler to be invoked. The first patch implements the sys_pselect() and sys_ppoll() system calls, which are present only if TIF_RESTORE_SIGMASK is defined. That #ifdef should go away in time when all architectures have implemented it. The second patch implements TIF_RESTORE_SIGMASK for the PowerPC kernel (in the -mm tree), and the third patch then removes the arch-specific implementations of sys_rt_sigsuspend() and replaces them with generic versions using the same trick. The fourth and fifth patches, provided by David Howells, implement TIF_RESTORE_SIGMASK for FR-V and i386 respectively, and the sixth patch adds the syscalls to the i386 syscall table. This patch: Add the pselect() and ppoll() system calls, providing core routines usable by the original select() and poll() system calls and also the new calls (with their semantics w.r.t timeouts). Signed-off-by: David Woodhouse Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/poll.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/poll.h b/include/linux/poll.h index f6da702088f4..8e8f6098508a 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -92,7 +92,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset) memset(fdset, 0, FDS_BYTES(nr)); } -extern int do_select(int n, fd_set_bits *fds, long *timeout); +#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) + +extern int do_select(int n, fd_set_bits *fds, s64 *timeout); +extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, + s64 *timeout); #endif /* KERNEL */ -- cgit v1.2.3 From 3213e913b0d6baeb28aa1affbdd4bfa7efedc35f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Jan 2006 17:44:06 -0800 Subject: [PATCH] Add pselect/ppoll system calls on i386 Add the sys_pselect6() and sys_poll() calls to the i386 syscall table. Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index 3400441b3f91..597496ed2aee 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -313,8 +313,10 @@ #define __NR_readlinkat 305 #define __NR_fchmodat 306 #define __NR_faccessat 307 +#define __NR_pselect6 308 +#define __NR_ppoll 309 -#define NR_syscalls 308 +#define NR_syscalls 310 /* * user-visible error numbers are in the range -1 - -128: see -- cgit v1.2.3 From 715b49ef2de6fcead0776d9349071670282faf65 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 18 Jan 2006 17:44:07 -0800 Subject: [PATCH] EDAC: atomic scrub operations EDAC requires a way to scrub memory if an ECC error is found and the chipset does not do the work automatically. That means rewriting memory locations atomically with respect to all CPUs _and_ bus masters. That means we can't use atomic_add(foo, 0) as it gets optimised for non-SMP This adds a function to include/asm-foo/atomic.h for the platforms currently supported which implements a scrub of a mapped block. It also adjusts a few other files include order where atomic.h is included before types.h as this now causes an error as atomic_scrub uses u32. Signed-off-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/atomic.h | 12 ++++++++++++ include/asm-x86_64/atomic.h | 12 ++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index de649d3aa2d4..e2c00c95a5e1 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -255,5 +255,17 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static __inline__ void atomic_scrub(unsigned long *virt_addr, u32 size) +{ + u32 i; + for (i = 0; i < size / 4; i++, virt_addr++) + /* Very carefully read and write to memory atomically + * so we are interrupt, DMA and SMP safe. + */ + __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); +} + #include #endif diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 4b5cd553e772..4048508c4f40 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -426,5 +426,17 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static __inline__ void atomic_scrub(u32 *virt_addr, u32 size) +{ + u32 i; + for (i = 0; i < size / 4; i++, virt_addr++) + /* Very carefully read and write to memory atomically + * so we are interrupt, DMA and SMP safe. + */ + __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); +} + #include #endif -- cgit v1.2.3 From da9bb1d27b21cb24cbb6a2efb5d3c464d357a01e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 18 Jan 2006 17:44:13 -0800 Subject: [PATCH] EDAC: core EDAC support code This is a subset of the bluesmoke project core code, stripped of the NMI work which isn't ready to merge and some of the "interesting" proc functionality that needs reworking or just has no place in kernel. It requires no core kernel changes except the added scrub functions already posted. The goal is to merge further functionality only after the core code is accepted and proven in the base kernel, and only at the point the upstream extras are really ready to merge. From: doug thompson This converts EDAC to sysfs and is the final chunk neccessary before EDAC has a stable user space API and can be considered for submission into the base kernel. Signed-off-by: Alan Cox Signed-off-by: Adrian Bunk Signed-off-by: Jesper Juhl Signed-off-by: doug thompson Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/atomic.h | 12 ------------ include/asm-i386/edac.h | 18 ++++++++++++++++++ include/asm-x86_64/atomic.h | 12 ------------ include/asm-x86_64/edac.h | 18 ++++++++++++++++++ 4 files changed, 36 insertions(+), 24 deletions(-) create mode 100644 include/asm-i386/edac.h create mode 100644 include/asm-x86_64/edac.h (limited to 'include') diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index e2c00c95a5e1..de649d3aa2d4 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -255,17 +255,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -/* ECC atomic, DMA, SMP and interrupt safe scrub function */ - -static __inline__ void atomic_scrub(unsigned long *virt_addr, u32 size) -{ - u32 i; - for (i = 0; i < size / 4; i++, virt_addr++) - /* Very carefully read and write to memory atomically - * so we are interrupt, DMA and SMP safe. - */ - __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); -} - #include #endif diff --git a/include/asm-i386/edac.h b/include/asm-i386/edac.h new file mode 100644 index 000000000000..3e7dd0ab68ce --- /dev/null +++ b/include/asm-i386/edac.h @@ -0,0 +1,18 @@ +#ifndef ASM_EDAC_H +#define ASM_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static __inline__ void atomic_scrub(void *va, u32 size) +{ + unsigned long *virt_addr = va; + u32 i; + + for (i = 0; i < size / 4; i++, virt_addr++) + /* Very carefully read and write to memory atomically + * so we are interrupt, DMA and SMP safe. + */ + __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); +} + +#endif diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 4048508c4f40..4b5cd553e772 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -426,17 +426,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \ #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -/* ECC atomic, DMA, SMP and interrupt safe scrub function */ - -static __inline__ void atomic_scrub(u32 *virt_addr, u32 size) -{ - u32 i; - for (i = 0; i < size / 4; i++, virt_addr++) - /* Very carefully read and write to memory atomically - * so we are interrupt, DMA and SMP safe. - */ - __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); -} - #include #endif diff --git a/include/asm-x86_64/edac.h b/include/asm-x86_64/edac.h new file mode 100644 index 000000000000..cad1cd42b4ee --- /dev/null +++ b/include/asm-x86_64/edac.h @@ -0,0 +1,18 @@ +#ifndef ASM_EDAC_H +#define ASM_EDAC_H + +/* ECC atomic, DMA, SMP and interrupt safe scrub function */ + +static __inline__ void atomic_scrub(void *va, u32 size) +{ + unsigned int *virt_addr = va; + u32 i; + + for (i = 0; i < size / 4; i++, virt_addr++) + /* Very carefully read and write to memory atomically + * so we are interrupt, DMA and SMP safe. + */ + __asm__ __volatile__("lock; addl $0, %0"::"m"(*virt_addr)); +} + +#endif -- cgit v1.2.3 From f7111ceb5266750db2a1d193b98fb6a3d9b5a56a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 Jan 2006 21:57:37 -0800 Subject: [SPARC]: sparc32 needs PROMDEV_{I,O}RSC defines too. Signed-off-by: David S. Miller --- include/asm-sparc/oplib.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-sparc/oplib.h b/include/asm-sparc/oplib.h index 95944556d8b6..d0d76b30eb4c 100644 --- a/include/asm-sparc/oplib.h +++ b/include/asm-sparc/oplib.h @@ -164,6 +164,7 @@ enum prom_input_device { PROMDEV_IKBD, /* input from keyboard */ PROMDEV_ITTYA, /* input from ttya */ PROMDEV_ITTYB, /* input from ttyb */ + PROMDEV_IRSC, /* input from rsc */ PROMDEV_I_UNK, }; @@ -175,6 +176,7 @@ enum prom_output_device { PROMDEV_OSCREEN, /* to screen */ PROMDEV_OTTYA, /* to ttya */ PROMDEV_OTTYB, /* to ttyb */ + PROMDEV_ORSC, /* to rsc */ PROMDEV_O_UNK, }; -- cgit v1.2.3 From 2d7d5f05111a9d913131a2764d8b20157f8f758d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jan 2006 02:42:49 -0800 Subject: [SPARC]: Add support for *at(), ppoll, and pselect syscalls. This also includes by necessity _TIF_RESTORE_SIGMASK support, which actually resulted in a lot of cleanups. The sparc signal handling code is quite a mess and I should clean it up some day. Signed-off-by: David S. Miller --- include/asm-sparc/thread_info.h | 5 +++-- include/asm-sparc/unistd.h | 22 +++++++++++++++++++--- include/asm-sparc64/thread_info.h | 6 +++--- include/asm-sparc64/unistd.h | 23 ++++++++++++++++++++--- 4 files changed, 45 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/asm-sparc/thread_info.h b/include/asm-sparc/thread_info.h index 65f060b040ab..91b9f5888c85 100644 --- a/include/asm-sparc/thread_info.h +++ b/include/asm-sparc/thread_info.h @@ -128,9 +128,10 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) * thread information flag bit numbers */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ +/* flag bit 1 is available */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 8 /* FPU was used by this task * this quantum (SMP) */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling @@ -139,9 +140,9 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *) /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< Date: Thu, 19 Jan 2006 16:58:37 -0800 Subject: [NETFILTER] x_tables: Make XT_ALIGN align as strictly as necessary. Or else we break on ppc32 and other 32-bit platforms. Based upon a patch from Harald Welte. Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 472f04834809..59ff6c430cf6 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -19,7 +19,7 @@ struct xt_get_revision /* For standard target */ #define XT_RETURN (-NF_REPEAT - 1) -#define XT_ALIGN(s) (((s) + (__alignof__(void *)-1)) & ~(__alignof__(void *)-1)) +#define XT_ALIGN(s) (((s) + (__alignof__(u_int64_t)-1)) & ~(__alignof__(u_int64_t)-1)) /* Standard return verdict, or do jump. */ #define XT_STANDARD_TARGET "" -- cgit v1.2.3