From 9ce8e3073d9cfd6f859c22a25441db41b85cbf6e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Aug 2008 15:23:18 +0200 Subject: libata: add whitelist for devices with known good pata-sata bridges libata currently imposes a UDMA5 max transfer rate and 200 sector max transfer size for SATA devices that sit behind a pata-sata bridge. Lots of devices have known good bridges that don't need this limit applied. The MTRON SSD disks are such devices. Transfer rates are increased by 20-30% with the restriction removed. So add a "blacklist" entry for the MTRON devices, with a flag indicating that the bridge is known good. Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 507f53ef8038..f5441edee55f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -372,6 +372,7 @@ enum { ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ + ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 9663f2e6a6cf3f82b06d8fb699b11b80f92553ba Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 30 Oct 2008 19:38:18 -0700 Subject: resources: add io-mapping functions to dynamically map large device apertures Impact: add new generic io_map_*() APIs Graphics devices have large PCI apertures which would consume a significant fraction of a 32-bit address space if mapped during driver initialization. Using ioremap at runtime is impractical as it is too slow. This new set of interfaces uses atomic mappings on 32-bit processors and a large static mapping on 64-bit processors to provide reasonable 32-bit performance and optimal 64-bit performance. The current implementation sits atop the io_map_atomic fixmap-based mechanism for 32-bit processors. This includes some editorial suggestions from Randy Dunlap for Documentation/io-mapping.txt Signed-off-by: Keith Packard Signed-off-by: Eric Anholt Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 118 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 include/linux/io-mapping.h (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h new file mode 100644 index 000000000000..1b566993db6e --- /dev/null +++ b/include/linux/io-mapping.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2008 Keith Packard + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef _LINUX_IO_MAPPING_H +#define _LINUX_IO_MAPPING_H + +#include +#include +#include +#include + +/* + * The io_mapping mechanism provides an abstraction for mapping + * individual pages from an io device to the CPU in an efficient fashion. + * + * See Documentation/io_mapping.txt + */ + +/* this struct isn't actually defined anywhere */ +struct io_mapping; + +#ifdef CONFIG_X86_64 + +/* Create the io_mapping object*/ +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) ioremap_wc(base, size); +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ + iounmap(mapping); +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ +} + +/* Non-atomic map/unmap */ +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + return ((char *) mapping) + offset; +} + +static inline void +io_mapping_unmap(void *vaddr) +{ +} + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_X86_32 +static inline struct io_mapping * +io_mapping_create_wc(unsigned long base, unsigned long size) +{ + return (struct io_mapping *) base; +} + +static inline void +io_mapping_free(struct io_mapping *mapping) +{ +} + +/* Atomic map/unmap */ +static inline void * +io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); +} + +static inline void +io_mapping_unmap_atomic(void *vaddr) +{ + iounmap_atomic(vaddr, KM_USER0); +} + +static inline void * +io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) +{ + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); +} + +static inline void +io_mapping_unmap(void *vaddr) +{ + iounmap(vaddr); +} +#endif /* CONFIG_X86_32 */ + +#endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From 4ac96572f1f6abe44b5e02e80fdfb5a990129613 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 2 Nov 2008 09:51:27 -0500 Subject: linux/string.h: fix comment typo s/user/used/ Signed-off-by: Jeff Garzik Signed-off-by: Linus Torvalds --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 810d80df0a1d..d18fc198aa2f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -1,7 +1,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ -/* We don't want strings.h stuff being user by user stuff by accident */ +/* We don't want strings.h stuff being used by user stuff by accident */ #ifndef __KERNEL__ #include -- cgit v1.2.3 From e5beae16901795223d677f15aa2fe192976278ee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 3 Nov 2008 18:21:45 +0100 Subject: io mapping: clean up #ifdefs Impact: cleanup clean up ifdefs: change #ifdef CONFIG_X86_32/64 to CONFIG_HAVE_ATOMIC_IOMAP. flip around the #ifdef sections to clean up the structure. Signed-off-by: Keith Packard Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 1b566993db6e..82df31726a54 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -33,86 +33,93 @@ /* this struct isn't actually defined anywhere */ struct io_mapping; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_HAVE_ATOMIC_IOMAP + +/* + * For small address space machines, mapping large objects + * into the kernel virtual space isn't practical. Where + * available, use fixmap support to dynamically map pages + * of the object at run time. + */ -/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping *) base; } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, + __pgprot(__PAGE_KERNEL_WC)); } static inline void io_mapping_unmap_atomic(void *vaddr) { + iounmap_atomic(vaddr, KM_USER0); } -/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + offset += (unsigned long) mapping; + return ioremap_wc(offset, PAGE_SIZE); } static inline void io_mapping_unmap(void *vaddr) { + iounmap(vaddr); } -#endif /* CONFIG_X86_64 */ +#else -#ifdef CONFIG_X86_32 +/* Create the io_mapping object*/ static inline struct io_mapping * io_mapping_create_wc(unsigned long base, unsigned long size) { - return (struct io_mapping *) base; + return (struct io_mapping *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { + iounmap(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap_atomic(void *vaddr) { - iounmap_atomic(vaddr, KM_USER0); } +/* Non-atomic map/unmap */ static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + return ((char *) mapping) + offset; } static inline void io_mapping_unmap(void *vaddr) { - iounmap(vaddr); } -#endif /* CONFIG_X86_32 */ + +#endif /* HAVE_ATOMIC_IOMAP */ #endif /* _LINUX_IO_MAPPING_H */ -- cgit v1.2.3 From 6a87e42e955ff27e07a77f65f8f077dc7c4171e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Nov 2008 19:01:09 +0900 Subject: libata: implement ATA_HORKAGE_ATAPI_MOD16_DMA and apply it libata always uses PIO for ATAPI commands when the number of bytes to transfer isn't multiple of 16 but quantum DAT72 chokes on odd bytes PIO transfers. Implement a horkage to skip the mod16 check and apply it to the quantum device. This is reported by John Clark in the following thread. http://thread.gmane.org/gmane.linux.ide/34748 Signed-off-by: Tejun Heo Cc: John Clark Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f5441edee55f..c7665a4134c5 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -373,6 +373,8 @@ enum { ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ + ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands + not multiple of 16 bytes */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 9b22ea560957de1484e6b3e8538f7eef202e3596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 4 Nov 2008 14:49:57 -0800 Subject: net: fix packet socket delivery in rx irq handler The changes to deliver hardware accelerated VLAN packets to packet sockets (commit bc1d0411) caused a warning for non-NAPI drivers. The __vlan_hwaccel_rx() function is called directly from the drivers RX function, for non-NAPI drivers that means its still in RX IRQ context: [ 27.779463] ------------[ cut here ]------------ [ 27.779509] WARNING: at kernel/softirq.c:136 local_bh_enable+0x37/0x81() ... [ 27.782520] [] netif_nit_deliver+0x5b/0x75 [ 27.782590] [] __vlan_hwaccel_rx+0x79/0x162 [ 27.782664] [] atl1_intr+0x9a9/0xa7c [atl1] [ 27.782738] [] handle_IRQ_event+0x23/0x51 [ 27.782808] [] handle_edge_irq+0xc2/0x102 [ 27.782878] [] do_IRQ+0x4d/0x64 Split hardware accelerated VLAN reception into two parts to fix this: - __vlan_hwaccel_rx just stores the VLAN TCI and performs the VLAN device lookup, then calls netif_receive_skb()/netif_rx() - vlan_hwaccel_do_receive(), which is invoked by netif_receive_skb() in softirq context, performs the real reception and delivery to packet sockets. Reported-and-tested-by: Ramon Casellas Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 9e7b49b8062d..a5cb0c3f6dcf 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -114,6 +114,8 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling); +extern int vlan_hwaccel_do_receive(struct sk_buff *skb); + #else static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { @@ -133,6 +135,11 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, BUG(); return NET_XMIT_SUCCESS; } + +static inline int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + return 0; +} #endif /** -- cgit v1.2.3 From 467622ef2acb01986eab37ef96c3632b3ea35999 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 1 Nov 2008 04:19:11 -0700 Subject: [MTD] [NOR] Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) For "unlock" cycles to 16bit devices in 8bit compatibility mode we need to use the byte addresses 0xaaa and 0x555. These effectively match the word address 0x555 and 0x2aa, except the latter has its low bit set. Most chips don't care about the value of the 'A-1' pin in x8 mode, but some -- like the ST M29W320D -- do. So we need to be careful to set it where appropriate. cfi_send_gen_cmd is only ever passed addresses where the low byte is 0x00, 0x55 or 0xaa. Of those, only addresses ending 0xaa are affected by this patch, by masking in the extra low bit when the device is known to be in compatibility mode. [dwmw2: Do it only when (cmd_ofs & 0xff) == 0xaa] v4: Fix stupid typo in cfi_build_cmd_addr that failed to compile I'm writing this patch way to late at night. v3: Bring all of the work back into cfi_build_cmd_addr including calling of map_bankwidth(map) and cfi_interleave(cfi) So every caller doesn't need to. v2: Only modified the address if we our device_type is larger than our bus width. Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David Woodhouse --- include/linux/mtd/cfi.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index ee5124ec319e..00e2b575021f 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -282,9 +282,25 @@ struct cfi_private { /* * Returns the command address according to the given geometry. */ -static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, int interleave, int type) +static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, + struct map_info *map, struct cfi_private *cfi) { - return (cmd_ofs * type) * interleave; + unsigned bankwidth = map_bankwidth(map); + unsigned interleave = cfi_interleave(cfi); + unsigned type = cfi->device_type; + uint32_t addr; + + addr = (cmd_ofs * type) * interleave; + + /* Modify the unlock address if we are in compatiblity mode. + * For 16bit devices on 8 bit busses + * and 32bit devices on 16 bit busses + * set the low bit of the alternating bit sequence of the address. + */ + if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa)) + addr |= (type >> 1)*interleave; + + return addr; } /* @@ -430,7 +446,7 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t int type, map_word *prev_val) { map_word val; - uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); + uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, map, cfi); val = cfi_build_cmd(cmd, map, cfi); if (prev_val) -- cgit v1.2.3 From 9fcd18c9e63e325dbd2b4c726623f760788d5aa8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 5 Nov 2008 16:52:08 +0100 Subject: sched: re-tune balancing Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: Ingo Molnar --- include/linux/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 2158fc0d5a56..34a7ee0ebed2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -146,10 +146,10 @@ void arch_update_cpu_topology(void); .wake_idx = 1, \ .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ | BALANCE_FOR_PKG_POWER,\ .last_balance = jiffies, \ .balance_interval = 1, \ -- cgit v1.2.3 From f92131c3dd567fc6df18ce3f46fcf57ecbdefbe0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 29 Oct 2008 14:10:51 +0100 Subject: bio: define __BIOVEC_PHYS_MERGEABLE Define __BIOVEC_PHYS_MERGEABLE as the default implementation of BIOVEC_PHYS_MERGEABLE, so that its available for reuse within an arch-specific definition of BIOVEC_PHYS_MERGEABLE. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1c91a176b9ae..6a642098e5c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -236,12 +236,16 @@ static inline void *bio_data(struct bio *bio) #define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) #define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) +/* Default implementation of BIOVEC_PHYS_MERGEABLE */ +#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + /* * allow arch override, for eg virtualized architectures (put in asm/io.h) */ #ifndef BIOVEC_PHYS_MERGEABLE #define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ - ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) + __BIOVEC_PHYS_MERGEABLE(vec1, vec2) #endif #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ -- cgit v1.2.3 From 9c133c469d38043d5aadaa03f2fb840d88d1cf4f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 6 Nov 2008 08:42:48 +0100 Subject: Add round_jiffies_up and related routines This patch (as1158b) adds round_jiffies_up() and friends. These routines work like the analogous round_jiffies() functions, except that they will never round down. The new routines will be useful for timeouts where we don't care exactly when the timer expires, provided it doesn't expire too soon. Signed-off-by: Alan Stern Signed-off-by: Jens Axboe --- include/linux/timer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index d4ba79248a27..daf9685b861c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -186,4 +186,9 @@ unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); +unsigned long __round_jiffies_up(unsigned long j, int cpu); +unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); +unsigned long round_jiffies_up(unsigned long j); +unsigned long round_jiffies_up_relative(unsigned long j); + #endif -- cgit v1.2.3 From f8d570a4745835f2238a33b537218a1bb03fc671 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 6 Nov 2008 00:37:40 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* -- cgit v1.2.3 From 9e975dae2970d22557662761c8505ce9fd165684 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:46 -0800 Subject: fat: split include/msdos_fs.h This splits __KERNEL__ stuff in include/msdos_fs.h into fs/fat/fat.h. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 276 +---------------------------------------------- 1 file changed, 2 insertions(+), 274 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ba63858056c7..0982fb47a90d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -167,282 +167,10 @@ struct msdos_dir_slot { }; #ifdef __KERNEL__ - -#include -#include -#include -#include -#include - -/* - * vfat shortname flags - */ -#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ -#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ -#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ -#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ -#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ - -struct fat_mount_options { - uid_t fs_uid; - gid_t fs_gid; - unsigned short fs_fmask; - unsigned short fs_dmask; - unsigned short codepage; /* Codepage for shortname conversions */ - char *iocharset; /* Charset used for filename input/display */ - unsigned short shortname; /* flags for shortname display/create rule */ - unsigned char name_check; /* r = relaxed, n = normal, s = strict */ - unsigned short allow_utime;/* permission for setting the [am]time */ - unsigned quiet:1, /* set = fake successful chmods and chowns */ - showexec:1, /* set = only set x bit for com/exe/bat */ - sys_immutable:1, /* set = system files are immutable */ - dotsOK:1, /* set = hidden and system files are named '.filename' */ - isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ - utf8:1, /* Use of UTF-8 character set (Default) */ - unicode_xlate:1, /* create escape sequences for unhandled Unicode */ - numtail:1, /* Does first alias have a numeric '~1' type tail? */ - flush:1, /* write things quickly */ - nocase:1, /* Does this need case conversion? 0=need case conversion*/ - usefree:1, /* Use free_clusters for FAT32 */ - tz_utc:1; /* Filesystem timestamps are in UTC */ -}; - -#define FAT_HASH_BITS 8 -#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) -#define FAT_HASH_MASK (FAT_HASH_SIZE-1) - -/* - * MS-DOS file system in-core superblock data - */ -struct msdos_sb_info { - unsigned short sec_per_clus; /* sectors/cluster */ - unsigned short cluster_bits; /* log2(cluster_size) */ - unsigned int cluster_size; /* cluster size */ - unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ - unsigned short fat_start; - unsigned long fat_length; /* FAT start & length (sec.) */ - unsigned long dir_start; - unsigned short dir_entries; /* root dir start & entries */ - unsigned long data_start; /* first data sector */ - unsigned long max_cluster; /* maximum cluster number */ - unsigned long root_cluster; /* first cluster of the root directory */ - unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ - struct mutex fat_lock; - unsigned int prev_free; /* previously allocated cluster number */ - unsigned int free_clusters; /* -1 if undefined */ - unsigned int free_clus_valid; /* is free_clusters valid? */ - struct fat_mount_options options; - struct nls_table *nls_disk; /* Codepage used on disk */ - struct nls_table *nls_io; /* Charset used for input and display */ - const void *dir_ops; /* Opaque; default directory operations */ - int dir_per_block; /* dir entries per block */ - int dir_per_block_bits; /* log2(dir_per_block) */ - - int fatent_shift; - struct fatent_operations *fatent_ops; - - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[FAT_HASH_SIZE]; -}; - -#define FAT_CACHE_VALID 0 /* special case for valid cache */ - -/* - * MS-DOS file system inode data in memory - */ -struct msdos_inode_info { - spinlock_t cache_lru_lock; - struct list_head cache_lru; - int nr_caches; - /* for avoiding the race between fat_free() and fat_get_cluster() */ - unsigned int cache_valid_id; - - loff_t mmu_private; - int i_start; /* first cluster or 0 */ - int i_logstart; /* logical first cluster */ - int i_attrs; /* unused attribute bits */ - loff_t i_pos; /* on-disk position of directory entry or 0 */ - struct hlist_node i_fat_hash; /* hash by i_location */ - struct inode vfs_inode; -}; - -struct fat_slot_info { - loff_t i_pos; /* on-disk position of directory entry */ - loff_t slot_off; /* offset for slot or de start */ - int nr_slots; /* number of slots + 1(de) in filename */ - struct msdos_dir_entry *de; - struct buffer_head *bh; -}; - -static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) -{ - return container_of(inode, struct msdos_inode_info, vfs_inode); -} - -/* Return the FAT attribute byte for this inode */ -static inline u8 fat_attr(struct inode *inode) -{ - return ((inode->i_mode & S_IWUGO) ? ATTR_NONE : ATTR_RO) | - (S_ISDIR(inode->i_mode) ? ATTR_DIR : ATTR_NONE) | - MSDOS_I(inode)->i_attrs; -} - -static inline unsigned char fat_checksum(const __u8 *name) -{ - unsigned char s = name[0]; - s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; - s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; - s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; - s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; - s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; - return s; -} - -static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) -{ - return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus - + sbi->data_start; -} - -static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - *dst++ = src[0] | (src[1] << 8); - src += 2; - } -#else - memcpy(dst, src, len * 2); -#endif -} - -static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) -{ -#ifdef __BIG_ENDIAN - while (len--) { - dst[0] = *src & 0x00FF; - dst[1] = (*src & 0xFF00) >> 8; - dst += 2; - src++; - } -#else - memcpy(dst, src, len * 2); -#endif -} - /* media of boot sector */ static inline int fat_valid_media(u8 media) { return 0xf8 <= media || media == 0xf0; } - -/* fat/cache.c */ -extern void fat_cache_inval_inode(struct inode *inode); -extern int fat_get_cluster(struct inode *inode, int cluster, - int *fclus, int *dclus); -extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks); - -/* fat/dir.c */ -extern const struct file_operations fat_dir_operations; -extern int fat_search_long(struct inode *inode, const unsigned char *name, - int name_len, struct fat_slot_info *sinfo); -extern int fat_dir_empty(struct inode *dir); -extern int fat_subdirs(struct inode *dir); -extern int fat_scan(struct inode *dir, const unsigned char *name, - struct fat_slot_info *sinfo); -extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, - struct msdos_dir_entry **de, loff_t *i_pos); -extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); -extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, - struct fat_slot_info *sinfo); -extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); - -/* fat/fatent.c */ -struct fat_entry { - int entry; - union { - u8 *ent12_p[2]; - __le16 *ent16_p; - __le32 *ent32_p; - } u; - int nr_bhs; - struct buffer_head *bhs[2]; -}; - -static inline void fatent_init(struct fat_entry *fatent) -{ - fatent->nr_bhs = 0; - fatent->entry = 0; - fatent->u.ent32_p = NULL; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -static inline void fatent_set_entry(struct fat_entry *fatent, int entry) -{ - fatent->entry = entry; - fatent->u.ent32_p = NULL; -} - -static inline void fatent_brelse(struct fat_entry *fatent) -{ - int i; - fatent->u.ent32_p = NULL; - for (i = 0; i < fatent->nr_bhs; i++) - brelse(fatent->bhs[i]); - fatent->nr_bhs = 0; - fatent->bhs[0] = fatent->bhs[1] = NULL; -} - -extern void fat_ent_access_init(struct super_block *sb); -extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, - int entry); -extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, - int new, int wait); -extern int fat_alloc_clusters(struct inode *inode, int *cluster, - int nr_cluster); -extern int fat_free_clusters(struct inode *inode, int cluster); -extern int fat_count_free_clusters(struct super_block *sb); - -/* fat/file.c */ -extern int fat_generic_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); -extern const struct file_operations fat_file_operations; -extern const struct inode_operations fat_file_inode_operations; -extern int fat_setattr(struct dentry * dentry, struct iattr * attr); -extern void fat_truncate(struct inode *inode); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); - -/* fat/inode.c */ -extern void fat_attach(struct inode *inode, loff_t i_pos); -extern void fat_detach(struct inode *inode); -extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); -extern struct inode *fat_build_inode(struct super_block *sb, - struct msdos_dir_entry *de, loff_t i_pos); -extern int fat_sync_inode(struct inode *inode); -extern int fat_fill_super(struct super_block *sb, void *data, int silent, - const struct inode_operations *fs_dir_inode_ops, int isvfat); - -extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, - struct inode *i2); -/* fat/misc.c */ -extern void fat_fs_panic(struct super_block *s, const char *fmt, ...); -extern void fat_clusters_flush(struct super_block *sb); -extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); -extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc); -extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, - int tz_utc); -extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); - -int fat_cache_init(void); -void fat_cache_destroy(void); - -#endif /* __KERNEL__ */ - -#endif +#endif /* !__KERNEL__ */ +#endif /* !_LINUX_MSDOS_FS_H */ -- cgit v1.2.3 From 9c0aa1b87bf541affef519eb4879ce7c5a5941ae Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 6 Nov 2008 12:53:54 -0800 Subject: fat: Cleanup FAT attribute stuff This adds three helpers: fat_make_attrs() - makes FAT attributes from inode. fat_make_mode() - makes mode_t from FAT attributes. fat_save_attrs() - saves FAT attributes to inode. Then this replaces: MSDOS_MKMODE() by fat_make_mode(), fat_attr() by fat_make_attrs(), ->i_attrs = attr & ATTR_UNUSED by fat_save_attrs(). And for root inode, those is used with ATTR_DIR instead of bogus ATTR_NONE. Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index 0982fb47a90d..e0a9b207920d 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -46,11 +46,6 @@ #define DELETED_FLAG 0xe5 /* marks file as deleted when in name[0] */ #define IS_FREE(n) (!*(n) || *(n) == DELETED_FLAG) -/* valid file mode bits */ -#define MSDOS_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO) -/* Convert attribute bits and a mask to the UNIX mode. */ -#define MSDOS_MKMODE(a, m) (m & (a & ATTR_RO ? S_IRUGO|S_IXUGO : S_IRWXUGO)) - #define MSDOS_NAME 11 /* maximum name length */ #define MSDOS_LONGNAME 256 /* maximum name length */ #define MSDOS_SLOTS 21 /* max # of slots for short and long names */ -- cgit v1.2.3 From 7597bc94d6f3bdccb086ac7f2ad91292fdaee2a4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Nov 2008 17:38:47 +0000 Subject: Fix accidental implicit cast in HR-timer conversion Fix the hrtimer_add_expires_ns() function. It should take a 'u64 ns' argument, but rather takes an 'unsigned long ns' argument - which might only be 32-bits. On FRV, this results in the kernel locking up because hrtimer_forward() passes the result of a 64-bit multiplication to this function, for which the compiler discards the top 32-bits - something that didn't happen when ktime_add_ns() was called directly. Signed-off-by: David Howells Acked-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2b3645b1acf4..07e510a3b00a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -239,7 +239,7 @@ static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } -static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); -- cgit v1.2.3 From 3b53fbf4314594fa04544b02b2fc6e607912da18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 6 Nov 2008 15:45:32 -0800 Subject: net: Fix recursive descent in __scm_destroy(). __scm_destroy() walks the list of file descriptors in the scm_fp_list pointed to by the scm_cookie argument. Those, in turn, can close sockets and invoke __scm_destroy() again. There is nothing which limits how deeply this can occur. The idea for how to fix this is from Linus. Basically, we do all of the fput()s at the top level by collecting all of the scm_fp_list objects hit by an fput(). Inside of the initial __scm_destroy() we keep running the list until it is empty. Signed-off-by: David S. Miller --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..295b7c756ca6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1349,6 +1349,8 @@ struct task_struct { */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; + + struct list_head *scm_work_list; }; /* -- cgit v1.2.3