From 466b3ddfbcf4f5ce402a77397630a0fa9ea9ce6b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sun, 30 Oct 2011 16:35:08 +0100 Subject: PCI: Fix compile errors with PCI_ATS and !PCI_IOV The ats and sroiv members of 'struct pci_dev' are required for the ATS code already, even without IOV support compiled in. So depend on ATS here. This is fine with PCI_IOV too because it selects PCI_ATS. Also the prototypes for ATS need to be available for PCI_ATS. Reported-by: Randy Dunlap Signed-off-by: Joerg Roedel Signed-off-by: Jesse Barnes --- include/linux/pci-ats.h | 6 +++--- include/linux/pci.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index e3d0b3890249..7ef68724f0f0 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -12,7 +12,7 @@ struct pci_ats { unsigned int is_enabled:1; /* Enable bit is set */ }; -#ifdef CONFIG_PCI_IOV +#ifdef CONFIG_PCI_ATS extern int pci_enable_ats(struct pci_dev *dev, int ps); extern void pci_disable_ats(struct pci_dev *dev); @@ -29,7 +29,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev) return dev->ats && dev->ats->is_enabled; } -#else /* CONFIG_PCI_IOV */ +#else /* CONFIG_PCI_ATS */ static inline int pci_enable_ats(struct pci_dev *dev, int ps) { @@ -50,7 +50,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev) return 0; } -#endif /* CONFIG_PCI_IOV */ +#endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI diff --git a/include/linux/pci.h b/include/linux/pci.h index 337df0d5d5f7..7cda65b5f798 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -338,7 +338,7 @@ struct pci_dev { struct list_head msi_list; #endif struct pci_vpd *vpd; -#ifdef CONFIG_PCI_IOV +#ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* SR-IOV capability related */ struct pci_dev *physfn; /* the PF this VF is associated with */ -- cgit v1.2.3 From d65670a78cdbfae94f20a9e05ec705871d7cdf2b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 31 Oct 2011 17:06:35 -0400 Subject: clocksource: Avoid selecting mult values that might overflow when adjusted For some frequencies, the clocks_calc_mult_shift() function will unfortunately select mult values very close to 0xffffffff. This has the potential to overflow when NTP adjusts the clock, adding to the mult value. This patch adds a clocksource.maxadj value, which provides an approximation of an 11% adjustment(NTP limits adjustments to 500ppm and the tick adjustment is limited to 10%), which could be made to the clocksource.mult value. This is then used to both check that the current mult value won't overflow/underflow, as well as warning us if the timekeeping_adjust() code pushes over that 11% boundary. v2: Fix max_adjustment calculation, and improve WARN_ONCE messages. v3: Don't warn before maxadj has actually been set CC: Yong Zhang CC: David Daney CC: Thomas Gleixner CC: Chen Jie CC: zhangfx CC: stable@kernel.org Reported-by: Chen Jie Reported-by: zhangfx Tested-by: Yong Zhang Signed-off-by: John Stultz --- include/linux/clocksource.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 139c4db55f17..c86c940d1de3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -156,6 +156,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) + * @maxadj maximum adjustment value to mult (~11%) * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary @@ -172,7 +173,7 @@ struct clocksource { u32 mult; u32 shift; u64 max_idle_ns; - + u32 maxadj; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif -- cgit v1.2.3 From f1c6f1a7eed963ed233ba4c8b6fa8addb86c6ddc Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Wed, 26 Oct 2011 23:14:16 +0200 Subject: sched: Set the command name of the idle tasks in SMP kernels In UP systems, the idle task is initialized using the init_task structure from which the command name is taken (currently "swapper"). In SMP systems, one idle task per CPU is forked by the worker thread from which the task structure is copied. The command name is, therefore, "kworker/0:0" or "kworker/0:1", if not updated. Since such update was lacking, all idle tasks in SMP systems were incorrectly named. This longtime bug was not discovered immediately, because there is no /proc/0 entry - the bug only becomes apparent when tracing is enabled. This patch sets the command name of the idle tasks in SMP systems to the name that is used in the INIT_TASK structure suffixed by a slash and the number of the CPU. Signed-off-by: Carsten Emde Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111026211708.768925506@osadl.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08ffab01e76c..b6e5b8b000e0 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -126,6 +126,8 @@ extern struct cred init_cred; # define INIT_PERF_EVENTS(tsk) #endif +#define INIT_TASK_COMM "swapper" + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,7 +164,7 @@ extern struct cred init_cred; .group_leader = &tsk, \ RCU_INIT_POINTER(.real_cred, &init_cred), \ RCU_INIT_POINTER(.cred, &init_cred), \ - .comm = "swapper", \ + .comm = INIT_TASK_COMM, \ .thread = INIT_THREAD, \ .fs = &init_fs, \ .files = &init_files, \ -- cgit v1.2.3 From f6f8285132907757ef84ef8dae0a1244b8cde6ac Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2011 12:58:07 -0800 Subject: pstore: pass allocated memory region back to caller The buf_lock cannot be held while populating the inodes, so make the backend pass forward an allocated and filled buffer instead. This solves the following backtrace. The effect is that "buf" is only ever used to notify the backends that something was written to it, and shouldn't be used in the read path. To replace the buf_lock during the read path, isolate the open/read/close loop with a separate mutex to maintain serialized access to the backend. Note that is is up to the pstore backend to cope if the (*write)() path is called in the middle of the read path. [ 59.691019] BUG: sleeping function called from invalid context at .../mm/slub.c:847 [ 59.691019] in_atomic(): 0, irqs_disabled(): 1, pid: 1819, name: mount [ 59.691019] Pid: 1819, comm: mount Not tainted 3.0.8 #1 [ 59.691019] Call Trace: [ 59.691019] [<810252d5>] __might_sleep+0xc3/0xca [ 59.691019] [<810a26e6>] kmem_cache_alloc+0x32/0xf3 [ 59.691019] [<810b53ac>] ? __d_lookup_rcu+0x6f/0xf4 [ 59.691019] [<810b68b1>] alloc_inode+0x2a/0x64 [ 59.691019] [<810b6903>] new_inode+0x18/0x43 [ 59.691019] [<81142447>] pstore_get_inode.isra.1+0x11/0x98 [ 59.691019] [<81142623>] pstore_mkfile+0xae/0x26f [ 59.691019] [<810a2a66>] ? kmem_cache_free+0x19/0xb1 [ 59.691019] [<8116c821>] ? ida_get_new_above+0x140/0x158 [ 59.691019] [<811708ea>] ? __init_rwsem+0x1e/0x2c [ 59.691019] [<810b67e8>] ? inode_init_always+0x111/0x1b0 [ 59.691019] [<8102127e>] ? should_resched+0xd/0x27 [ 59.691019] [<8137977f>] ? _cond_resched+0xd/0x21 [ 59.691019] [<81142abf>] pstore_get_records+0x52/0xa7 [ 59.691019] [<8114254b>] pstore_fill_super+0x7d/0x91 [ 59.691019] [<810a7ff5>] mount_single+0x46/0x82 [ 59.691019] [<8114231a>] pstore_mount+0x15/0x17 [ 59.691019] [<811424ce>] ? pstore_get_inode.isra.1+0x98/0x98 [ 59.691019] [<810a8199>] mount_fs+0x5a/0x12d [ 59.691019] [<810b9174>] ? alloc_vfsmnt+0xa4/0x14a [ 59.691019] [<810b9474>] vfs_kern_mount+0x4f/0x7d [ 59.691019] [<810b9d7e>] do_kern_mount+0x34/0xb2 [ 59.691019] [<810bb15f>] do_mount+0x5fc/0x64a [ 59.691019] [<810912fb>] ? strndup_user+0x2e/0x3f [ 59.691019] [<810bb3cb>] sys_mount+0x66/0x99 [ 59.691019] [<8137b537>] sysenter_do_call+0x12/0x26 Signed-off-by: Kees Cook Signed-off-by: Tony Luck --- include/linux/pstore.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index ea567321ae3c..2ca8cde5459d 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -35,10 +35,12 @@ struct pstore_info { spinlock_t buf_lock; /* serialize access to 'buf' */ char *buf; size_t bufsize; + struct mutex read_mutex; /* serialize open/read/close */ int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, - struct timespec *time, struct pstore_info *psi); + struct timespec *time, char **buf, + struct pstore_info *psi); int (*write)(enum pstore_type_id type, u64 *id, unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, -- cgit v1.2.3 From 5eccdf5e06eb67779716ae26142402a1ae9b012c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 21 Nov 2011 06:53:46 +0000 Subject: tc: comment spelling fixes Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index c5336705921f..7281d5acf2f9 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -30,7 +30,7 @@ */ struct tc_stats { - __u64 bytes; /* NUmber of enqueues bytes */ + __u64 bytes; /* Number of enqueued bytes */ __u32 packets; /* Number of enqueued packets */ __u32 drops; /* Packets dropped because of lack of resources */ __u32 overlimits; /* Number of throttle events when this @@ -297,7 +297,7 @@ struct tc_htb_glob { __u32 debug; /* debug flags */ /* stats */ - __u32 direct_pkts; /* count of non shapped packets */ + __u32 direct_pkts; /* count of non shaped packets */ }; enum { TCA_HTB_UNSPEC, @@ -503,7 +503,7 @@ enum { }; #define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) -/* State transition probablities for 4 state model */ +/* State transition probabilities for 4 state model */ struct tc_netem_gimodel { __u32 p13; __u32 p31; -- cgit v1.2.3 From 5151412dd4338b273afdb107c3772528e9e67d92 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Nov 2011 10:59:13 +0100 Subject: block: initialize request_queue's numa node during struct request_queue is allocated with __GFP_ZERO so its "node" field is zero before initialization. This causes an oops if node 0 is offline in the page allocator because its zonelists are not initialized. From Dave Young's dmesg: SRAT: Node 1 PXM 2 0-d0000000 SRAT: Node 1 PXM 2 100000000-330000000 SRAT: Node 0 PXM 1 330000000-630000000 Initmem setup node 1 0000000000000000-000000000affb000 ... Built 1 zonelists in Node order, mobility grouping on. ... BUG: unable to handle kernel paging request at 0000000000001c08 IP: [] __alloc_pages_nodemask+0xb5/0x870 and __alloc_pages_nodemask+0xb5 translates to a NULL pointer on zonelist->_zonerefs. The fix is to initialize q->node at the time of allocation so the correct node is passed to the slab allocator later. Since blk_init_allocated_queue_node() is no longer needed, merge it with blk_init_allocated_queue(). [rientjes@google.com: changelog, initializing q->node] Cc: stable@vger.kernel.org [2.6.37+] Reported-by: Dave Young Signed-off-by: Mike Snitzer Signed-off-by: David Rientjes Tested-by: Dave Young Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c7a6d3b5bc7b..94acd8172b5b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -805,9 +805,6 @@ extern void blk_unprep_request(struct request *); */ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_allocated_queue_node(struct request_queue *, - request_fn_proc *, - spinlock_t *, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); extern struct request_queue *blk_init_allocated_queue(struct request_queue *, request_fn_proc *, spinlock_t *); -- cgit v1.2.3 From fe1a7fe2c4456679b3402f04268bdfafca7b127a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 15 Nov 2011 16:17:18 +0200 Subject: virtio-mmio: Correct the name of the guest features selector Guest features selector spelling mistake. Cc: Pawel Moll Cc: Rusty Russell Cc: virtualization@lists.linux-foundation.org Signed-off-by: Sasha Levin Signed-off-by: Rusty Russell --- include/linux/virtio_mmio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_mmio.h b/include/linux/virtio_mmio.h index 27c7edefbc86..5c7b6f0daef8 100644 --- a/include/linux/virtio_mmio.h +++ b/include/linux/virtio_mmio.h @@ -63,7 +63,7 @@ #define VIRTIO_MMIO_GUEST_FEATURES 0x020 /* Activated features set selector - Write Only */ -#define VIRTIO_MMIO_GUEST_FEATURES_SET 0x024 +#define VIRTIO_MMIO_GUEST_FEATURES_SEL 0x024 /* Guest's memory page size in bytes - Write Only */ #define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 -- cgit v1.2.3 From e6af578c5305be693a1bc7f4dc7b51dd82d41425 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 17 Nov 2011 17:41:15 +0200 Subject: virtio-pci: make reset operation safer virtio pci device reset actually just does an I/O write, which in PCI is really posted, that is it can complete on CPU before the device has received it. Further, interrupts might have been pending on another CPU, so device callback might get invoked after reset. This conflicts with how drivers use reset, which is typically: reset unregister a callback running after reset completed can race with unregister, potentially leading to use after free bugs. Fix by flushing out the write, and flushing pending interrupts. This assumes that device is never reset from its vq/config callbacks, or in parallel with being added/removed, document this assumption. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell --- include/linux/virtio_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index add4790b21fe..e9e72bda1b72 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -85,6 +85,8 @@ * @reset: reset the device * vdev: the virtio device * After this, status and feature negotiation must be done again + * Device must not be reset from its vq/config callbacks, or in + * parallel with being added/removed. * @find_vqs: find virtqueues and instantiate them. * vdev: the virtio_device * nvqs: the number of virtqueues to find -- cgit v1.2.3 From f7bc83d87d242917ca0ee041ed509f57f361dd56 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Nov 2011 21:20:32 +0100 Subject: PM: Update comments describing device power management callbacks The comments describing device power management callbacks in include/pm.h are outdated and somewhat confusing, so make them reflect the reality more accurately. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 229 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 5c4c8b18c8b7..3f3ed83a9aa5 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -54,118 +54,145 @@ typedef struct pm_message { /** * struct dev_pm_ops - device PM callbacks * - * Several driver power state transitions are externally visible, affecting + * Several device power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) * interrupts, wakeups, DMA, and other hardware state. There may also be - * internal transitions to various low power modes, which are transparent + * internal transitions to various low-power modes which are transparent * to the rest of the driver stack (such as a driver that's ON gating off * clocks which are not in active use). * - * The externally visible transitions are handled with the help of the following - * callbacks included in this structure: - * - * @prepare: Prepare the device for the upcoming transition, but do NOT change - * its hardware state. Prevent new children of the device from being - * registered after @prepare() returns (the driver's subsystem and - * generally the rest of the kernel is supposed to prevent new calls to the - * probe method from being made too once @prepare() has succeeded). If - * @prepare() detects a situation it cannot handle (e.g. registration of a - * child already in progress), it may return -EAGAIN, so that the PM core - * can execute it once again (e.g. after the new child has been registered) - * to recover from the race condition. This method is executed for all - * kinds of suspend transitions and is followed by one of the suspend - * callbacks: @suspend(), @freeze(), or @poweroff(). - * The PM core executes @prepare() for all devices before starting to - * execute suspend callbacks for any of them, so drivers may assume all of - * the other devices to be present and functional while @prepare() is being - * executed. In particular, it is safe to make GFP_KERNEL memory - * allocations from within @prepare(). However, drivers may NOT assume - * anything about the availability of the user space at that time and it - * is not correct to request firmware from within @prepare() (it's too - * late to do that). [To work around this limitation, drivers may - * register suspend and hibernation notifiers that are executed before the - * freezing of tasks.] + * The externally visible transitions are handled with the help of callbacks + * included in this structure in such a way that two levels of callbacks are + * involved. First, the PM core executes callbacks provided by PM domains, + * device types, classes and bus types. They are the subsystem-level callbacks + * supposed to execute callbacks provided by device drivers, although they may + * choose not to do that. If the driver callbacks are executed, they have to + * collaborate with the subsystem-level callbacks to achieve the goals + * appropriate for the given system transition, given transition phase and the + * subsystem the device belongs to. + * + * @prepare: The principal role of this callback is to prevent new children of + * the device from being registered after it has returned (the driver's + * subsystem and generally the rest of the kernel is supposed to prevent + * new calls to the probe method from being made too once @prepare() has + * succeeded). If @prepare() detects a situation it cannot handle (e.g. + * registration of a child already in progress), it may return -EAGAIN, so + * that the PM core can execute it once again (e.g. after a new child has + * been registered) to recover from the race condition. + * This method is executed for all kinds of suspend transitions and is + * followed by one of the suspend callbacks: @suspend(), @freeze(), or + * @poweroff(). The PM core executes subsystem-level @prepare() for all + * devices before starting to invoke suspend callbacks for any of them, so + * generally devices may be assumed to be functional or to respond to + * runtime resume requests while @prepare() is being executed. However, + * device drivers may NOT assume anything about the availability of user + * space at that time and it is NOT valid to request firmware from within + * @prepare() (it's too late to do that). It also is NOT valid to allocate + * substantial amounts of memory from @prepare() in the GFP_KERNEL mode. + * [To work around these limitations, drivers may register suspend and + * hibernation notifiers to be executed before the freezing of tasks.] * * @complete: Undo the changes made by @prepare(). This method is executed for * all kinds of resume transitions, following one of the resume callbacks: * @resume(), @thaw(), @restore(). Also called if the state transition - * fails before the driver's suspend callback (@suspend(), @freeze(), - * @poweroff()) can be executed (e.g. if the suspend callback fails for one + * fails before the driver's suspend callback: @suspend(), @freeze() or + * @poweroff(), can be executed (e.g. if the suspend callback fails for one * of the other devices that the PM core has unsuccessfully attempted to * suspend earlier). - * The PM core executes @complete() after it has executed the appropriate - * resume callback for all devices. + * The PM core executes subsystem-level @complete() after it has executed + * the appropriate resume callbacks for all devices. * * @suspend: Executed before putting the system into a sleep state in which the - * contents of main memory are preserved. Quiesce the device, put it into - * a low power state appropriate for the upcoming system state (such as - * PCI_D3hot), and enable wakeup events as appropriate. + * contents of main memory are preserved. The exact action to perform + * depends on the device's subsystem (PM domain, device type, class or bus + * type), but generally the device must be quiescent after subsystem-level + * @suspend() has returned, so that it doesn't do any I/O or DMA. + * Subsystem-level @suspend() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @resume: Executed after waking the system up from a sleep state in which the - * contents of main memory were preserved. Put the device into the - * appropriate state, according to the information saved in memory by the - * preceding @suspend(). The driver starts working again, responding to - * hardware events and software requests. The hardware may have gone - * through a power-off reset, or it may have maintained state from the - * previous suspend() which the driver may rely on while resuming. On most - * platforms, there are no restrictions on availability of resources like - * clocks during @resume(). + * contents of main memory were preserved. The exact action to perform + * depends on the device's subsystem, but generally the driver is expected + * to start working again, responding to hardware events and software + * requests (the device itself may be left in a low-power state, waiting + * for a runtime resume to occur). The state of the device at the time its + * driver's @resume() callback is run depends on the platform and subsystem + * the device belongs to. On most platforms, there are no restrictions on + * availability of resources like clocks during @resume(). + * Subsystem-level @resume() is executed for all devices after invoking + * subsystem-level @resume_noirq() for all of them. * * @freeze: Hibernation-specific, executed before creating a hibernation image. - * Quiesce operations so that a consistent image can be created, but do NOT - * otherwise put the device into a low power device state and do NOT emit - * system wakeup events. Save in main memory the device settings to be - * used by @restore() during the subsequent resume from hibernation or by - * the subsequent @thaw(), if the creation of the image or the restoration - * of main memory contents from it fails. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. The majority of subsystems + * (with the notable exception of the PCI bus type) expect the driver-level + * @freeze() to save the device settings in memory to be used by @restore() + * during the subsequent resume from hibernation. + * Subsystem-level @freeze() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @thaw: Hibernation-specific, executed after creating a hibernation image OR - * if the creation of the image fails. Also executed after a failing + * if the creation of an image has failed. Also executed after a failing * attempt to restore the contents of main memory from such an image. * Undo the changes made by the preceding @freeze(), so the device can be * operated in the same way as immediately before the call to @freeze(). + * Subsystem-level @thaw() is executed for all devices after invoking + * subsystem-level @thaw_noirq() for all of them. It also may be executed + * directly after @freeze() in case of a transition error. * * @poweroff: Hibernation-specific, executed after saving a hibernation image. - * Quiesce the device, put it into a low power state appropriate for the - * upcoming system state (such as PCI_D3hot), and enable wakeup events as - * appropriate. + * Analogous to @suspend(), but it need not save the device's settings in + * memory. + * Subsystem-level @poweroff() is executed for all devices after invoking + * subsystem-level @prepare() for all of them. * * @restore: Hibernation-specific, executed after restoring the contents of main - * memory from a hibernation image. Driver starts working again, - * responding to hardware events and software requests. Drivers may NOT - * make ANY assumptions about the hardware state right prior to @restore(). - * On most platforms, there are no restrictions on availability of - * resources like clocks during @restore(). - * - * @suspend_noirq: Complete the operations of ->suspend() by carrying out any - * actions required for suspending the device that need interrupts to be - * disabled - * - * @resume_noirq: Prepare for the execution of ->resume() by carrying out any - * actions required for resuming the device that need interrupts to be - * disabled - * - * @freeze_noirq: Complete the operations of ->freeze() by carrying out any - * actions required for freezing the device that need interrupts to be - * disabled - * - * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any - * actions required for thawing the device that need interrupts to be - * disabled - * - * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any - * actions required for handling the device that need interrupts to be - * disabled - * - * @restore_noirq: Prepare for the execution of ->restore() by carrying out any - * actions required for restoring the operations of the device that need - * interrupts to be disabled + * memory from a hibernation image, analogous to @resume(). + * + * @suspend_noirq: Complete the actions started by @suspend(). Carry out any + * additional operations required for suspending the device that might be + * racing with its driver's interrupt handler, which is guaranteed not to + * run while @suspend_noirq() is being executed. + * It generally is expected that the device will be in a low-power state + * (appropriate for the target system sleep state) after subsystem-level + * @suspend_noirq() has returned successfully. If the device can generate + * system wakeup signals and is enabled to wake up the system, it should be + * configured to do so at that time. However, depending on the platform + * and device's subsystem, @suspend() may be allowed to put the device into + * the low-power state and configure it to generate wakeup signals, in + * which case it generally is not necessary to define @suspend_noirq(). + * + * @resume_noirq: Prepare for the execution of @resume() by carrying out any + * operations required for resuming the device that might be racing with + * its driver's interrupt handler, which is guaranteed not to run while + * @resume_noirq() is being executed. + * + * @freeze_noirq: Complete the actions started by @freeze(). Carry out any + * additional operations required for freezing the device that might be + * racing with its driver's interrupt handler, which is guaranteed not to + * run while @freeze_noirq() is being executed. + * The power state of the device should not be changed by either @freeze() + * or @freeze_noirq() and it should not be configured to signal system + * wakeup by any of these callbacks. + * + * @thaw_noirq: Prepare for the execution of @thaw() by carrying out any + * operations required for thawing the device that might be racing with its + * driver's interrupt handler, which is guaranteed not to run while + * @thaw_noirq() is being executed. + * + * @poweroff_noirq: Complete the actions started by @poweroff(). Analogous to + * @suspend_noirq(), but it need not save the device's settings in memory. + * + * @restore_noirq: Prepare for the execution of @restore() by carrying out any + * operations required for thawing the device that might be racing with its + * driver's interrupt handler, which is guaranteed not to run while + * @restore_noirq() is being executed. Analogous to @resume_noirq(). * * All of the above callbacks, except for @complete(), return error codes. * However, the error codes returned by the resume operations, @resume(), - * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do + * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq(), do * not cause the PM core to abort the resume transition during which they are - * returned. The error codes returned in that cases are only printed by the PM + * returned. The error codes returned in those cases are only printed by the PM * core to the system logs for debugging purposes. Still, it is recommended * that drivers only return error codes from their resume methods in case of an * unrecoverable failure (i.e. when the device being handled refuses to resume @@ -174,31 +201,43 @@ typedef struct pm_message { * their children. * * It is allowed to unregister devices while the above callbacks are being - * executed. However, it is not allowed to unregister a device from within any - * of its own callbacks. + * executed. However, a callback routine must NOT try to unregister the device + * it was called for, although it may unregister children of that device (for + * example, if it detects that a child was unplugged while the system was + * asleep). + * + * Refer to Documentation/power/devices.txt for more information about the role + * of the above callbacks in the system suspend process. * - * There also are the following callbacks related to run-time power management - * of devices: + * There also are callbacks related to runtime power management of devices. + * Again, these callbacks are executed by the PM core only for subsystems + * (PM domains, device types, classes and bus types) and the subsystem-level + * callbacks are supposed to invoke the driver callbacks. Moreover, the exact + * actions to be performed by a device driver's callbacks generally depend on + * the platform and subsystem the device belongs to. * * @runtime_suspend: Prepare the device for a condition in which it won't be * able to communicate with the CPU(s) and RAM due to power management. - * This need not mean that the device should be put into a low power state. + * This need not mean that the device should be put into a low-power state. * For example, if the device is behind a link which is about to be turned * off, the device may remain at full power. If the device does go to low - * power and is capable of generating run-time wake-up events, remote - * wake-up (i.e., a hardware mechanism allowing the device to request a - * change of its power state via a wake-up event, such as PCI PME) should - * be enabled for it. + * power and is capable of generating runtime wakeup events, remote wakeup + * (i.e., a hardware mechanism allowing the device to request a change of + * its power state via an interrupt) should be enabled for it. * * @runtime_resume: Put the device into the fully active state in response to a - * wake-up event generated by hardware or at the request of software. If - * necessary, put the device into the full power state and restore its + * wakeup event generated by hardware or at the request of software. If + * necessary, put the device into the full-power state and restore its * registers, so that it is fully operational. * - * @runtime_idle: Device appears to be inactive and it might be put into a low - * power state if all of the necessary conditions are satisfied. Check + * @runtime_idle: Device appears to be inactive and it might be put into a + * low-power state if all of the necessary conditions are satisfied. Check * these conditions and handle the device as appropriate, possibly queueing * a suspend request for it. The return value is ignored by the PM core. + * + * Refer to Documentation/power/runtime_pm.txt for more information about the + * role of the above callbacks in device runtime power management. + * */ struct dev_pm_ops { -- cgit v1.2.3 From 5cac98dd06bc43a7baab3523184f70fd359e9f35 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 27 Nov 2011 21:14:46 +0000 Subject: net: Fix corruption in /proc/*/net/dev_mcast I just hit this during my testing. Isn't there another bug lurking? BUG kmalloc-8: Redzone overwritten INFO: 0xc0000000de9dec48-0xc0000000de9dec4b. First byte 0x0 instead of 0xcc INFO: Allocated in .__seq_open_private+0x30/0xa0 age=0 cpu=5 pid=3896 .__kmalloc+0x1e0/0x2d0 .__seq_open_private+0x30/0xa0 .seq_open_net+0x60/0xe0 .dev_mc_seq_open+0x4c/0x70 .proc_reg_open+0xd8/0x260 .__dentry_open.clone.11+0x2b8/0x400 .do_last+0xf4/0x950 .path_openat+0xf8/0x480 .do_filp_open+0x48/0xc0 .do_sys_open+0x140/0x250 syscall_exit+0x0/0x40 dev_mc_seq_ops uses dev_seq_start/next/stop but only allocates sizeof(struct seq_net_private) of private data, whereas it expects sizeof(struct dev_iter_state): struct dev_iter_state { struct seq_net_private p; unsigned int pos; /* bucket << BUCKET_SPACE + offset */ }; Create dev_seq_open_ops and use it so we don't have to expose struct dev_iter_state. [ Problem added by commit f04565ddf52e4 (dev: use name hash for dev_seq_ops) -Eric ] Signed-off-by: Anton Blanchard Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cbeb5867cff7..a82ad4dd306a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2536,6 +2536,8 @@ extern void net_disable_timestamp(void); extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); extern void dev_seq_stop(struct seq_file *seq, void *v); +extern int dev_seq_open_ops(struct inode *inode, struct file *file, + const struct seq_operations *ops); #endif extern int netdev_class_create_file(struct class_attribute *class_attr); -- cgit v1.2.3 From 4f718a29fe4908c2cea782f751e9805319684e2b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Nov 2011 09:44:14 +0100 Subject: firmware: Sigma: Prevent out of bounds memory access The SigmaDSP firmware loader currently does not perform enough boundary size checks when processing the firmware. As a result it is possible that a malformed firmware can cause an out of bounds memory access. This patch adds checks which ensure that both the action header and the payload are completely inside the firmware data boundaries before processing them. Signed-off-by: Lars-Peter Clausen Acked-by: Mike Frysinger Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/sigma.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sigma.h b/include/linux/sigma.h index e2accb3164d8..9a138c2946bb 100644 --- a/include/linux/sigma.h +++ b/include/linux/sigma.h @@ -50,11 +50,6 @@ static inline u32 sigma_action_len(struct sigma_action *sa) return (sa->len_hi << 16) | sa->len; } -static inline size_t sigma_action_size(struct sigma_action *sa, u32 payload_len) -{ - return sizeof(*sa) + payload_len + (payload_len % 2); -} - extern int process_sigma_firmware(struct i2c_client *client, const char *name); #endif -- cgit v1.2.3 From bda63586bc5929e97288cdb371bb6456504867ed Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 28 Nov 2011 09:44:16 +0100 Subject: firmware: Sigma: Fix endianess issues Currently the SigmaDSP firmware loader only works correctly on little-endian systems. Fix this by using the proper endianess conversion functions. Signed-off-by: Lars-Peter Clausen Acked-by: Mike Frysinger Signed-off-by: Mark Brown Cc: stable@kernel.org --- include/linux/sigma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sigma.h b/include/linux/sigma.h index 9a138c2946bb..d0de882c0d96 100644 --- a/include/linux/sigma.h +++ b/include/linux/sigma.h @@ -24,7 +24,7 @@ struct sigma_firmware { struct sigma_firmware_header { unsigned char magic[7]; u8 version; - u32 crc; + __le32 crc; }; enum { @@ -40,14 +40,14 @@ enum { struct sigma_action { u8 instr; u8 len_hi; - u16 len; - u16 addr; + __le16 len; + __be16 addr; unsigned char payload[]; }; static inline u32 sigma_action_len(struct sigma_action *sa) { - return (sa->len_hi << 16) | sa->len; + return (sa->len_hi << 16) | le16_to_cpu(sa->len); } extern int process_sigma_firmware(struct i2c_client *client, const char *name); -- cgit v1.2.3 From a67ba43d30bf8c1cfdc2615439455302d2408453 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 1 Dec 2011 12:54:31 -0500 Subject: asm-generic/unistd.h: support new process_vm_{readv,write} syscalls Also prototype the "compat" functions so they can be referenced from C code. Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 154bf5683015..66ed067fb729 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -552,5 +552,14 @@ extern ssize_t compat_rw_copy_check_uvector(int type, extern void __user *compat_alloc_user_space(unsigned long len); +asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); +asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, + const struct compat_iovec __user *lvec, + unsigned long liovcnt, const struct compat_iovec __user *rvec, + unsigned long riovcnt, unsigned long flags); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.3 From 10c6db110d0eb4466b59812c49088ab56218fc2e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 26 Nov 2011 02:47:31 +0100 Subject: perf: Fix loss of notification with multi-event When you do: $ perf record -e cycles,cycles,cycles noploop 10 You expect about 10,000 samples for each event, i.e., 10s at 1000samples/sec. However, this is not what's happening. You get much fewer samples, maybe 3700 samples/event: $ perf report -D | tail -15 Aggregated stats: TOTAL events: 10998 MMAP events: 66 COMM events: 2 SAMPLE events: 10930 cycles stats: TOTAL events: 3644 SAMPLE events: 3644 cycles stats: TOTAL events: 3642 SAMPLE events: 3642 cycles stats: TOTAL events: 3644 SAMPLE events: 3644 On a Intel Nehalem or even AMD64, there are 4 counters capable of measuring cycles, so there is plenty of space to measure those events without multiplexing (even with the NMI watchdog active). And even with multiplexing, we'd expect roughly the same number of samples per event. The root of the problem was that when the event that caused the buffer to become full was not the first event passed on the cmdline, the user notification would get lost. The notification was sent to the file descriptor of the overflowed event but the perf tool was not polling on it. The perf tool aggregates all samples into a single buffer, i.e., the buffer of the first event. Consequently, it assumes notifications for any event will come via that descriptor. The seemingly straight forward solution of moving the waitq into the ringbuffer object doesn't work because of life-time issues. One could perf_event_set_output() on a fd that you're also blocking on and cause the old rb object to be freed while its waitq would still be referenced by the blocked thread -> FAIL. Therefore link all events to the ringbuffer and broadcast the wakeup from the ringbuffer object to all possible events that could be waited upon. This is rather ugly, and we're open to better solutions but it works for now. Reported-by: Stephane Eranian Finished-by: Stephane Eranian Reviewed-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20111126014731.GA7030@quad Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1e9ebe5e0091..b1f89122bf6a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -822,6 +822,7 @@ struct perf_event { int mmap_locked; struct user_struct *mmap_user; struct ring_buffer *rb; + struct list_head rb_entry; /* poll related */ wait_queue_head_t waitq; -- cgit v1.2.3 From f62ef5f3e9cff065aa845e2b7f487e1810b8e57e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 2 Dec 2011 08:21:43 +0100 Subject: x86, amd: Fix up numa_node information for AMD CPU family 15h model 0-0fh northbridge functions I've received complaints that the numa_node attribute for family 15h model 00-0fh (e.g. Interlagos) northbridge functions shows -1 instead of the proper node ID. Correct this with attached quirks (similar to quirks for other AMD CPU families used in multi-socket systems). Signed-off-by: Andreas Herrmann Cc: Frank Arnold Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20111202072143.GA31916@alberich.amd.com Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 172ba70306d1..2aaee0ca9da8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -517,8 +517,12 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600 +#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 +#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603 #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604 +#define PCI_DEVICE_ID_AMD_15H_NB_F5 0x1605 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 -- cgit v1.2.3 From 27b14b56af081ec7edeefb3a38b2c9577cc5ef48 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 1 Nov 2011 09:09:35 +0800 Subject: tracing: Restore system filter behavior Though not all events have field 'prev_pid', it was allowed to do this: # echo 'prev_pid == 100' > events/sched/filter but commit 75b8e98263fdb0bfbdeba60d4db463259f1fe8a2 (tracing/filter: Swap entire filter of events) broke it without any reason. Link: http://lkml.kernel.org/r/4EAF46CF.8040408@cn.fujitsu.com Signed-off-by: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 96efa6794ea5..c3da42dd22ba 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -172,6 +172,7 @@ enum { TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_RECORDED_CMD_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, + TRACE_EVENT_FL_NO_SET_FILTER_BIT, }; enum { @@ -179,6 +180,7 @@ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), + TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), }; struct ftrace_event_call { -- cgit v1.2.3 From 02125a826459a6ad142f8d91c5b6357562f96615 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 5 Dec 2011 08:43:34 -0500 Subject: fix apparmor dereferencing potentially freed dentry, sanitize __d_path() API __d_path() API is asking for trouble and in case of apparmor d_namespace_path() getting just that. The root cause is that when __d_path() misses the root it had been told to look for, it stores the location of the most remote ancestor in *root. Without grabbing references. Sure, at the moment of call it had been pinned down by what we have in *path. And if we raced with umount -l, we could have very well stopped at vfsmount/dentry that got freed as soon as prepend_path() dropped vfsmount_lock. It is safe to compare these pointers with pre-existing (and known to be still alive) vfsmount and dentry, as long as all we are asking is "is it the same address?". Dereferencing is not safe and apparmor ended up stepping into that. d_namespace_path() really wants to examine the place where we stopped, even if it's not connected to our namespace. As the result, it looked at ->d_sb->s_magic of a dentry that might've been already freed by that point. All other callers had been careful enough to avoid that, but it's really a bad interface - it invites that kind of trouble. The fix is fairly straightforward, even though it's bigger than I'd like: * prepend_path() root argument becomes const. * __d_path() is never called with NULL/NULL root. It was a kludge to start with. Instead, we have an explicit function - d_absolute_root(). Same as __d_path(), except that it doesn't get root passed and stops where it stops. apparmor and tomoyo are using it. * __d_path() returns NULL on path outside of root. The main caller is show_mountinfo() and that's precisely what we pass root for - to skip those outside chroot jail. Those who don't want that can (and do) use d_path(). * __d_path() root argument becomes const. Everyone agrees, I hope. * apparmor does *NOT* try to use __d_path() or any of its variants when it sees that path->mnt is an internal vfsmount. In that case it's definitely not mounted anywhere and dentry_path() is exactly what we want there. Handling of sysctl()-triggered weirdness is moved to that place. * if apparmor is asked to do pathname relative to chroot jail and __d_path() tells it we it's not in that jail, the sucker just calls d_absolute_path() instead. That's the other remaining caller of __d_path(), BTW. * seq_path_root() does _NOT_ return -ENAMETOOLONG (it's stupid anyway - the normal seq_file logics will take care of growing the buffer and redoing the call of ->show() just fine). However, if it gets path not reachable from root, it returns SEQ_SKIP. The only caller adjusted (i.e. stopped ignoring the return value as it used to do). Reviewed-by: John Johansen ACKed-by: John Johansen Signed-off-by: Al Viro Cc: stable@vger.kernel.org --- include/linux/dcache.h | 3 ++- include/linux/fs.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4df926199369..ed9f74f6c519 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -339,7 +339,8 @@ extern int d_validate(struct dentry *, struct dentry *); */ extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); -extern char *__d_path(const struct path *path, struct path *root, char *, int); +extern char *__d_path(const struct path *, const struct path *, char *, int); +extern char *d_absolute_path(const struct path *, char *, int); extern char *d_path(const struct path *, char *, int); extern char *d_path_with_unreachable(const struct path *, char *, int); extern char *dentry_path_raw(struct dentry *, char *, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index e3130220ce3e..019dc558df1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,6 +1942,7 @@ extern int fd_statfs(int, struct kstatfs *); extern int statfs_by_dentry(struct dentry *, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); +extern bool our_mnt(struct vfsmount *mnt); extern int current_umask(void); -- cgit v1.2.3 From 83aeeada7c69f35e5100b27ec354335597a7a488 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 8 Dec 2011 14:33:54 -0800 Subject: vmscan: use atomic-long for shrinker batching Use atomic-long operations instead of looping around cmpxchg(). [akpm@linux-foundation.org: massage atomic.h inclusions] Signed-off-by: Konstantin Khlebnikov Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- include/linux/mm.h | 1 + include/linux/shrinker.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 019dc558df1a..e0bc4ffb8e7f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -393,8 +393,8 @@ struct inodes_stat_t { #include #include #include -#include #include +#include #include diff --git a/include/linux/mm.h b/include/linux/mm.h index 3dc3a8c2c485..4baadd18f4ad 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index a83833a1f7a2..07ceb97d53fa 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -35,7 +35,7 @@ struct shrinker { /* These are for internal use */ struct list_head list; - long nr; /* objs pending delete */ + atomic_long_t nr_in_batch; /* objs pending delete */ }; #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ extern void register_shrinker(struct shrinker *); -- cgit v1.2.3 From 6de5fc9cf7de334912de4cfd2d06eb2d744d2afe Mon Sep 17 00:00:00 2001 From: Stefan Nilsson XK Date: Thu, 3 Nov 2011 09:44:12 +0100 Subject: mmc: core: Add quirk for long data read time Adds a quirk that sets the data read timeout to a fixed value instead of relying on the information in the CSD. The timeout value chosen is 300ms since that has proven enough for the problematic cards found, but could be increased if other cards require this. This patch also enables this quirk for certain Micron cards known to have this problem. Signed-off-by: Stefan Nilsson XK Signed-off-by: Ulf Hansson Acked-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- include/linux/mmc/card.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 415f2db414e1..c8ef9bc54d50 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -218,6 +218,7 @@ struct mmc_card { #define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ #define MMC_QUIRK_BLK_NO_CMD23 (1<<7) /* Avoid CMD23 for regular multiblock */ #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8) /* Avoid sending 512 bytes in */ +#define MMC_QUIRK_LONG_READ_TIME (1<<9) /* Data read time > CSD says */ /* byte mode */ unsigned int poweroff_notify_state; /* eMMC4.5 notify feature */ #define MMC_NO_POWER_NOTIFICATION 0 @@ -433,6 +434,11 @@ static inline int mmc_card_broken_byte_mode_512(const struct mmc_card *c) return c->quirks & MMC_QUIRK_BROKEN_BYTE_MODE_512; } +static inline int mmc_card_long_read_time(const struct mmc_card *c) +{ + return c->quirks & MMC_QUIRK_LONG_READ_TIME; +} + #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) (dev_name(&(c)->dev)) -- cgit v1.2.3 From 13c07b0286d340275f2d97adf085cecda37ede37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 12 Dec 2011 22:06:55 -0800 Subject: linux/log2.h: Fix rounddown_pow_of_two(1) Exactly like roundup_pow_of_two(1), the rounddown version was buggy for the case of a compile-time constant '1' argument. Probably because it originated from the same code, sharing history with the roundup version from before the bugfix (for that one, see commit 1a06a52ee1b0: "Fix roundup_pow_of_two(1)"). However, unlike the roundup version, the fix for rounddown is to just remove the broken special case entirely. It's simply not needed - the generic code 1UL << ilog2(n) does the right thing for the constant '1' argment too. The only reason roundup needed that special case was because rounding up does so by subtracting one from the argument (and then adding one to the result) causing the obvious problems with "ilog2(0)". But rounddown doesn't do any of that, since ilog2() naturally truncates (ie "rounds down") to the right rounded down value. And without the ilog2(0) case, there's no reason for the special case that had the wrong value. tl;dr: rounddown_pow_of_two(1) should be 1, not 0. Acked-by: Dmitry Torokhov Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/log2.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 25b808631cd9..fd7ff3d91e6a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define rounddown_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 0 : \ (1UL << ilog2(n))) : \ __rounddown_pow_of_two(n) \ ) -- cgit v1.2.3 From 8bc1f85c02a20a59956b00b3acea12c04dce9ae8 Mon Sep 17 00:00:00 2001 From: Eugeni Dodonov Date: Wed, 23 Nov 2011 16:42:14 -0200 Subject: iommu: Export intel_iommu_enabled to signal when iommu is in use In i915 driver, we do not enable either rc6 or semaphores on SNB when dmar is enabled. The new 'intel_iommu_enabled' variable signals when the iommu code is in operation. Cc: Ted Phelps Cc: Peter Cc: Lukas Hejtmanek Cc: Andrew Lutomirski CC: Daniel Vetter Cc: Eugeni Dodonov Signed-off-by: Keith Packard --- include/linux/dma_remapping.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index ef90cbd8e173..57c9a8ae4f2d 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -31,6 +31,7 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; +extern int intel_iommu_enabled; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { @@ -44,6 +45,7 @@ static inline void free_dmar_iommu(struct intel_iommu *iommu) { } #define dmar_disabled (1) +#define intel_iommu_enabled (0) #endif -- cgit v1.2.3 From b1b73d095084e754562961c443aa8f6587a55f8e Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Mon, 19 Dec 2011 18:13:19 +0900 Subject: time/clocksource: Fix kernel-doc warnings Fix various KernelDoc build warnings. Signed-off-by: Kusanagi Kouichi Cc: John Stultz Link: http://lkml.kernel.org/r/20111219091320.0D5AF6FC03D@msa105.auone-net.jp Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index c86c940d1de3..081147da0564 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -71,7 +71,7 @@ struct timecounter { /** * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds - * @tc: Pointer to cycle counter. + * @cc: Pointer to cycle counter. * @cycles: Cycles * * XXX - This could use some mult_lxl_ll() asm optimization. Same code @@ -114,7 +114,7 @@ extern u64 timecounter_read(struct timecounter *tc); * time base as values returned by * timecounter_read() * @tc: Pointer to time counter. - * @cycle: a value returned by tc->cc->read() + * @cycle_tstamp: a value returned by tc->cc->read() * * Cycle counts that are converted correctly as long as they * fall into the interval [-1/2 max cycle count, +1/2 max cycle count], @@ -156,11 +156,12 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) - * @maxadj maximum adjustment value to mult (~11%) + * @maxadj: maximum adjustment value to mult (~11%) * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @cycle_last: most recent cycle counter value seen by ::read() */ struct clocksource { /* @@ -187,6 +188,7 @@ struct clocksource { void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; @@ -261,6 +263,9 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant) /** * clocksource_cyc2ns - converts clocksource cycles to nanoseconds + * @cycles: cycles + * @mult: cycle to nanosecond multiplier + * @shift: cycle to nanosecond divisor (power of two) * * Converts cycles to nanoseconds, using the given mult and shift. * -- cgit v1.2.3