From 5f12a761937373d2f9b557d7519e6f1cf738b8f0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 11 Aug 2011 12:31:20 +0100
Subject: perf: provide PMU when initing events

Currently, an event's 'pmu' field is set after pmu::event_init() is
called. This means that pmu::event_init() must figure out which struct
pmu the event was initialised from. This makes it difficult to
consolidate common event initialisation code for similar PMUs, and
very difficult to implement drivers for PMUs which can have multiple
instances (e.g. a USB controller PMU, a GPU PMU, etc).

This patch sets the 'pmu' field before initialising the event, allowing
event init code to identify the struct pmu instance easily. In the
event of failure to initialise an event, the event is destroyed via
kfree() without calling perf_event::destroy(), so this shouldn't
result in bad behaviour even if the destroy field was set before
failure to initialise was noted.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1313062280-19123-1-git-send-email-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..68c8017de969 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5715,6 +5715,7 @@ struct pmu *perf_init_event(struct perf_event *event)
 	pmu = idr_find(&pmu_idr, event->attr.type);
 	rcu_read_unlock();
 	if (pmu) {
+		event->pmu = pmu;
 		ret = pmu->event_init(event);
 		if (ret)
 			pmu = ERR_PTR(ret);
@@ -5722,6 +5723,7 @@ struct pmu *perf_init_event(struct perf_event *event)
 	}
 
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		event->pmu = pmu;
 		ret = pmu->event_init(event);
 		if (!ret)
 			goto unlock;
@@ -5848,8 +5850,6 @@ done:
 		return ERR_PTR(err);
 	}
 
-	event->pmu = pmu;
-
 	if (!event->parent) {
 		if (event->attach_state & PERF_ATTACH_TASK)
 			jump_label_inc(&perf_sched_events);
-- 
cgit v1.2.3


From 590e4d857153c5d4cf86052cdfd42cf9b0779841 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sun, 24 Jul 2011 16:33:13 +0000
Subject: sched: Allow SD_NODES_PER_DOMAIN to be overridden

We want to override the default value of SD_NODES_PER_DOMAIN on ppc64,
so move it into linux/topology.h.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 kernel/sched.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index ec5f472bc5b9..d258b2d9a66d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6947,8 +6947,6 @@ static int __init isolated_cpu_setup(char *str)
 
 __setup("isolcpus=", isolated_cpu_setup);
 
-#define SD_NODES_PER_DOMAIN 16
-
 #ifdef CONFIG_NUMA
 
 /**
-- 
cgit v1.2.3


From ab10023e0088d5075354afc7cb9e72304757dddd Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Thu, 10 Feb 2011 02:04:45 -0800
Subject: cpu_pm: Add cpu power management notifiers

During some CPU power modes entered during idle, hotplug and
suspend, peripherals located in the CPU power domain, such as
the GIC, localtimers, and VFP, may be powered down.  Add a
notifier chain that allows drivers for those peripherals to
be notified before and after they may be reset.

Notified drivers can include VFP co-processor, interrupt controller
and it's PM extensions, local CPU timers context save/restore which
shouldn't be interrupted. Hence CPU PM event APIs  must be called
with interrupts disabled.

Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Tested-and-Acked-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Kevin Hilman <khilman@ti.com>
Tested-by: Vishwanath BS <vishwanath.bs@ti.com>
---
 kernel/Makefile      |   1 +
 kernel/cpu_pm.c      | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/power/Kconfig |   4 ++
 3 files changed, 205 insertions(+)
 create mode 100644 kernel/cpu_pm.c

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index eca595e2fd52..988cb3da7031 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
+obj-$(CONFIG_CPU_PM) += cpu_pm.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
 
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
new file mode 100644
index 000000000000..4d1ff4acd04b
--- /dev/null
+++ b/kernel/cpu_pm.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * Author:
+ *	Colin Cross <ccross@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpu_pm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+
+static DEFINE_RWLOCK(cpu_pm_notifier_lock);
+static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
+
+static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
+{
+	int ret;
+
+	ret = __raw_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
+		nr_to_call, nr_calls);
+
+	return notifier_to_errno(ret);
+}
+
+/**
+ * cpu_pm_register_notifier - register a driver with cpu_pm
+ * @nb: notifier block to register
+ *
+ * Add a driver to a list of drivers that are notified about
+ * CPU and CPU cluster low power entry and exit.
+ *
+ * This function may sleep, and has the same return conditions as
+ * raw_notifier_chain_register.
+ */
+int cpu_pm_register_notifier(struct notifier_block *nb)
+{
+	unsigned long flags;
+	int ret;
+
+	write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+	ret = raw_notifier_chain_register(&cpu_pm_notifier_chain, nb);
+	write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
+
+/**
+ * cpu_pm_unregister_notifier - unregister a driver with cpu_pm
+ * @nb: notifier block to be unregistered
+ *
+ * Remove a driver from the CPU PM notifier list.
+ *
+ * This function may sleep, and has the same return conditions as
+ * raw_notifier_chain_unregister.
+ */
+int cpu_pm_unregister_notifier(struct notifier_block *nb)
+{
+	unsigned long flags;
+	int ret;
+
+	write_lock_irqsave(&cpu_pm_notifier_lock, flags);
+	ret = raw_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
+	write_unlock_irqrestore(&cpu_pm_notifier_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
+
+/**
+ * cpm_pm_enter - CPU low power entry notifier
+ *
+ * Notifies listeners that a single CPU is entering a low power state that may
+ * cause some blocks in the same power domain as the cpu to reset.
+ *
+ * Must be called on the affected CPU with interrupts disabled.  Platform is
+ * responsible for ensuring that cpu_pm_enter is not called twice on the same
+ * CPU before cpu_pm_exit is called. Notified drivers can include VFP
+ * co-processor, interrupt controller and it's PM extensions, local CPU
+ * timers context save/restore which shouldn't be interrupted. Hence it
+ * must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_pm_enter(void)
+{
+	int nr_calls;
+	int ret = 0;
+
+	read_lock(&cpu_pm_notifier_lock);
+	ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
+	if (ret)
+		/*
+		 * Inform listeners (nr_calls - 1) about failure of CPU PM
+		 * PM entry who are notified earlier to prepare for it.
+		 */
+		cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
+	read_unlock(&cpu_pm_notifier_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_enter);
+
+/**
+ * cpm_pm_exit - CPU low power exit notifier
+ *
+ * Notifies listeners that a single CPU is exiting a low power state that may
+ * have caused some blocks in the same power domain as the cpu to reset.
+ *
+ * Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_pm_exit(void)
+{
+	int ret;
+
+	read_lock(&cpu_pm_notifier_lock);
+	ret = cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
+	read_unlock(&cpu_pm_notifier_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_pm_exit);
+
+/**
+ * cpm_cluster_pm_enter - CPU cluster low power entry notifier
+ *
+ * Notifies listeners that all cpus in a power domain are entering a low power
+ * state that may cause some blocks in the same power domain to reset.
+ *
+ * Must be called after cpu_pm_enter has been called on all cpus in the power
+ * domain, and before cpu_pm_exit has been called on any cpu in the power
+ * domain. Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_cluster_pm_enter(void)
+{
+	int nr_calls;
+	int ret = 0;
+
+	read_lock(&cpu_pm_notifier_lock);
+	ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
+	if (ret)
+		/*
+		 * Inform listeners (nr_calls - 1) about failure of CPU cluster
+		 * PM entry who are notified earlier to prepare for it.
+		 */
+		cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
+	read_unlock(&cpu_pm_notifier_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
+
+/**
+ * cpm_cluster_pm_exit - CPU cluster low power exit notifier
+ *
+ * Notifies listeners that all cpus in a power domain are exiting form a
+ * low power state that may have caused some blocks in the same power domain
+ * to reset.
+ *
+ * Must be called after cpu_pm_exit has been called on all cpus in the power
+ * domain, and before cpu_pm_exit has been called on any cpu in the power
+ * domain. Notified drivers can include VFP co-processor, interrupt controller
+ * and it's PM extensions, local CPU timers context save/restore which
+ * shouldn't be interrupted. Hence it must be called with interrupts disabled.
+ *
+ * Return conditions are same as __raw_notifier_call_chain.
+ */
+int cpu_cluster_pm_exit(void)
+{
+	int ret;
+
+	read_lock(&cpu_pm_notifier_lock);
+	ret = cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
+	read_unlock(&cpu_pm_notifier_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 3744c594b19b..80a85971cf64 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -235,3 +235,7 @@ config PM_GENERIC_DOMAINS
 config PM_GENERIC_DOMAINS_RUNTIME
 	def_bool y
 	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
+
+config CPU_PM
+	bool
+	depends on SUSPEND || CPU_IDLE
-- 
cgit v1.2.3


From 6f3eaec87b6b17bfa49cb3b5b8d07fa84be18512 Mon Sep 17 00:00:00 2001
From: Colin Cross <ccross@android.com>
Date: Fri, 22 Jul 2011 14:57:09 -0700
Subject: cpu_pm: call notifiers during suspend

Implements syscore_ops in cpu_pm to call the cpu and
cpu cluster notifiers during suspend and resume,
allowing drivers receiving the notifications to
avoid implementing syscore_ops.

Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Tested-and-Acked-by: Shawn Guo <shawn.guo@linaro.org>
Tested-by: Vishwanath BS <vishwanath.bs@ti.com>
---
 kernel/cpu_pm.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 4d1ff4acd04b..249152e15308 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
+#include <linux/syscore_ops.h>
 
 static DEFINE_RWLOCK(cpu_pm_notifier_lock);
 static RAW_NOTIFIER_HEAD(cpu_pm_notifier_chain);
@@ -198,3 +199,35 @@ int cpu_cluster_pm_exit(void)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
+
+#ifdef CONFIG_PM
+static int cpu_pm_suspend(void)
+{
+	int ret;
+
+	ret = cpu_pm_enter();
+	if (ret)
+		return ret;
+
+	ret = cpu_cluster_pm_enter();
+	return ret;
+}
+
+static void cpu_pm_resume(void)
+{
+	cpu_cluster_pm_exit();
+	cpu_pm_exit();
+}
+
+static struct syscore_ops cpu_pm_syscore_ops = {
+	.suspend = cpu_pm_suspend,
+	.resume = cpu_pm_resume,
+};
+
+static int cpu_pm_init(void)
+{
+	register_syscore_ops(&cpu_pm_syscore_ops);
+	return 0;
+}
+core_initcall(cpu_pm_init);
+#endif
-- 
cgit v1.2.3


From 9d823e8f6b1b7b39f952d7d1795f29162143a433 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 11 Jun 2011 18:10:12 -0600
Subject: writeback: per task dirty rate limit

Add two fields to task_struct.

1) account dirtied pages in the individual tasks, for accuracy
2) per-task balance_dirty_pages() call intervals, for flexibility

The balance_dirty_pages() call interval (ie. nr_dirtied_pause) will
scale near-sqrt to the safety gap between dirty pages and threshold.

The main problem of per-task nr_dirtied is, if 1k+ tasks start dirtying
pages at exactly the same time, each task will be assigned a large
initial nr_dirtied_pause, so that the dirty threshold will be exceeded
long before each task reached its nr_dirtied_pause and hence call
balance_dirty_pages().

The solution is to watch for the number of pages dirtied on each CPU in
between the calls into balance_dirty_pages(). If it exceeds ratelimit_pages
(3% dirty threshold), force call balance_dirty_pages() for a chance to
set bdi->dirty_exceeded. In normal situations, this safeguarding
condition is not expected to trigger at all.

On the sqrt in dirty_poll_interval():

It will serve as an initial guess when dirty pages are still in the
freerun area.

When dirty pages are floating inside the dirty control scope [freerun,
limit], a followup patch will use some refined dirty poll interval to
get the desired pause time.

   thresh-dirty (MB)    sqrt
		   1      16
		   2      22
		   4      32
		   8      45
		  16      64
		  32      90
		  64     128
		 128     181
		 256     256
		 512     362
		1024     512

The above table means, given 1MB (or 1GB) gap and the dd tasks polling
balance_dirty_pages() on every 16 (or 512) pages, the dirty limit won't
be exceeded as long as there are less than 16 (or 512) concurrent dd's.

So sqrt naturally leads to less overheads and more safe concurrent tasks
for large memory servers, which have large (thresh-freerun) gaps.

peter: keep the per-CPU ratelimit for safeguarding the 1k+ tasks case

CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Andrea Righi <andrea@betterlinux.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 kernel/fork.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 8e6b6f4fb272..cc0815df99f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,6 +1302,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->pdeath_signal = 0;
 	p->exit_state = 0;
 
+	p->nr_dirtied = 0;
+	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+
 	/*
 	 * Ok, make it visible to the rest of the system.
 	 * We dont wake it up yet.
-- 
cgit v1.2.3


From 825de2e9007439977aed63771db570fc2235e8cd Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Date: Mon, 17 Oct 2011 11:08:46 +0900
Subject: irq: Add EXPORT_SYMBOL_GPL to function of irq generic-chip

Some functions of irq generic-chip is undefined, because
EXPORT_SYMBOL_GPL is not set to these.

ERROR: "irq_setup_generic_chip" [drivers/gpio/gpio-pch.ko] undefined!
ERROR: "irq_alloc_generic_chip" [drivers/gpio/gpio-pch.ko] undefined!
ERROR: "irq_setup_generic_chip" [drivers/gpio/gpio-ml-ioh.ko] undefined!
ERROR: "irq_alloc_generic_chip" [drivers/gpio/gpio-ml-ioh.ko] undefined!

This is revised that EXPORT_SYMBOL_GPL can be added and referred
to in functions.

Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 kernel/irq/generic-chip.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index e38544dddb18..6cb7613e4bf4 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -211,6 +211,7 @@ irq_alloc_generic_chip(const char *name, int num_ct, unsigned int irq_base,
 	}
 	return gc;
 }
+EXPORT_SYMBOL_GPL(irq_alloc_generic_chip);
 
 /*
  * Separate lockdep class for interrupt chip which can nest irq_desc
@@ -258,6 +259,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 	}
 	gc->irq_cnt = i - gc->irq_base;
 }
+EXPORT_SYMBOL_GPL(irq_setup_generic_chip);
 
 /**
  * irq_setup_alt_chip - Switch to alternative chip
@@ -281,6 +283,7 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type)
 	}
 	return -EINVAL;
 }
+EXPORT_SYMBOL_GPL(irq_setup_alt_chip);
 
 /**
  * irq_remove_generic_chip - Remove a chip
@@ -311,6 +314,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
 		irq_modify_status(i, clr, set);
 	}
 }
+EXPORT_SYMBOL_GPL(irq_remove_generic_chip);
 
 #ifdef CONFIG_PM
 static int irq_gc_suspend(void)
-- 
cgit v1.2.3


From 189c3fd68c7016e37c1ffd7a00009e2c944a9d06 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Fri, 30 Sep 2011 09:10:54 -0700
Subject: stop_machine: make stop_machine safe and efficient to call early

Make stop_machine() safe to call early in boot, before stop_machine()
has been set up, by simply calling the callback function directly if
there's only one CPU online.

[ Fixes from AKPM:
   - add comment
   - local_irq_flags, not save_flags
   - also call hard_irq_disable() for systems which need it

  Tejun suggested using an explicit flag rather than just looking at
  the online cpu count. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/stop_machine.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'kernel')

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba5070ce5765..d3f960a6ca56 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -41,6 +41,7 @@ struct cpu_stopper {
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
+static bool stop_machine_initialized = false;
 
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
@@ -386,6 +387,8 @@ static int __init cpu_stop_init(void)
 	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
 	register_cpu_notifier(&cpu_stop_cpu_notifier);
 
+	stop_machine_initialized = true;
+
 	return 0;
 }
 early_initcall(cpu_stop_init);
@@ -485,6 +488,25 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 					    .num_threads = num_online_cpus(),
 					    .active_cpus = cpus };
 
+	if (!stop_machine_initialized) {
+		/*
+		 * Handle the case where stop_machine() is called
+		 * early in boot before stop_machine() has been
+		 * initialized.
+		 */
+ 		unsigned long flags;
+		int ret;
+
+		WARN_ON_ONCE(smdata.num_threads != 1);
+
+		local_irq_save(flags);
+		hard_irq_disable();
+		ret = (*fn)(data);
+		local_irq_restore(flags);
+
+		return ret;
+	}
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
 	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
-- 
cgit v1.2.3


From 37348804e0289087d21ae8bff4c0732030a3c6ac Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Thu, 29 Sep 2011 11:10:05 -0700
Subject: jump_label: if a key has already been initialized, don't nop it out

If a key has been enabled before jump_label_init() is called, don't
nop it out.

This removes arch_jump_label_text_poke_early() (which can only nop
out a site) and uses arch_jump_label_transform() instead.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/jump_label.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'kernel')

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index a8ce45097f3d..059202d5b77a 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -121,13 +121,6 @@ static void __jump_label_update(struct jump_label_key *key,
 	}
 }
 
-/*
- * Not all archs need this.
- */
-void __weak arch_jump_label_text_poke_early(jump_label_t addr)
-{
-}
-
 static __init int jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
@@ -139,12 +132,15 @@ static __init int jump_label_init(void)
 	jump_label_sort_entries(iter_start, iter_stop);
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		arch_jump_label_text_poke_early(iter->code);
-		if (iter->key == (jump_label_t)(unsigned long)key)
+		struct jump_label_key *iterk;
+
+		iterk = (struct jump_label_key *)(unsigned long)iter->key;
+		arch_jump_label_transform(iter, jump_label_enabled(iterk) ?
+					  JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+		if (iterk == key)
 			continue;
 
-		key = (struct jump_label_key *)(unsigned long)iter->key;
-		atomic_set(&key->enabled, 0);
+		key = iterk;
 		key->entries = iter;
 #ifdef CONFIG_MODULES
 		key->next = NULL;
@@ -212,7 +208,7 @@ void jump_label_apply_nops(struct module *mod)
 		return;
 
 	for (iter = iter_start; iter < iter_stop; iter++)
-		arch_jump_label_text_poke_early(iter->code);
+		arch_jump_label_transform(iter, JUMP_LABEL_DISABLE);
 }
 
 static int jump_label_add_module(struct module *mod)
-- 
cgit v1.2.3


From 20284aa77c0f6227da4783a920b72dc61d4bcc09 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 3 Oct 2011 11:01:46 -0700
Subject: jump_label: add arch_jump_label_transform_static() to optimise
 non-live code updates

When updating a newly loaded module, the code is definitely not yet
executing on any processor, so it can be updated with no need for any
heavyweight synchronization.

This patch adds arch_jump_label_static() which is implemented as
arch_jump_label_transform() by default, but architectures can override
it if it avoids, say, a call to stop_machine().

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Jason Baron <jbaron@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/jump_label.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 059202d5b77a..ff2028f35aa8 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -104,6 +104,18 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 	return 0;
 }
 
+/* 
+ * Update code which is definitely not currently executing.
+ * Architectures which need heavyweight synchronization to modify
+ * running code can override this to make the non-live update case
+ * cheaper.
+ */
+void __weak arch_jump_label_transform_static(struct jump_entry *entry,
+					    enum jump_label_type type)
+{
+	arch_jump_label_transform(entry, type);	
+}
+
 static void __jump_label_update(struct jump_label_key *key,
 				struct jump_entry *entry,
 				struct jump_entry *stop, int enable)
@@ -135,8 +147,8 @@ static __init int jump_label_init(void)
 		struct jump_label_key *iterk;
 
 		iterk = (struct jump_label_key *)(unsigned long)iter->key;
-		arch_jump_label_transform(iter, jump_label_enabled(iterk) ?
-					  JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
+		arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ?
+						 JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE);
 		if (iterk == key)
 			continue;
 
@@ -208,7 +220,7 @@ void jump_label_apply_nops(struct module *mod)
 		return;
 
 	for (iter = iter_start; iter < iter_stop; iter++)
-		arch_jump_label_transform(iter, JUMP_LABEL_DISABLE);
+		arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
 }
 
 static int jump_label_add_module(struct module *mod)
-- 
cgit v1.2.3


From 97ce2c88f9ad42e3c60a9beb9fca87abf3639faa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 12 Oct 2011 16:17:54 -0700
Subject: jump-label: initialize jump-label subsystem much earlier

Initialize jump_labels much, much earlier, so they're available for use
during system setup.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/jump_label.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index ff2028f35aa8..bbdfe2a462a0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -133,7 +133,7 @@ static void __jump_label_update(struct jump_label_key *key,
 	}
 }
 
-static __init int jump_label_init(void)
+void __init jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
 	struct jump_entry *iter_stop = __stop___jump_table;
@@ -159,10 +159,7 @@ static __init int jump_label_init(void)
 #endif
 	}
 	jump_label_unlock();
-
-	return 0;
 }
-early_initcall(jump_label_init);
 
 #ifdef CONFIG_MODULES
 
-- 
cgit v1.2.3


From 3d214faea6e4f9b6018bf8589f4b245126349c0a Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Sun, 30 Oct 2011 15:16:36 +0100
Subject: [S390] kdump: Add KEXEC_CRASH_CONTROL_MEMORY_LIMIT

On s390 there is a different KEXEC_CONTROL_MEMORY_LIMIT for the normal and
the kdump kexec case. Therefore this patch introduces a new macro
KEXEC_CRASH_CONTROL_MEMORY_LIMIT. This is set to
KEXEC_CONTROL_MEMORY_LIMIT for all architectures that do not define
KEXEC_CRASH_CONTROL_MEMORY_LIMIT.

Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 kernel/kexec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 296fbc84d659..7204fb982ed5 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -498,7 +498,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
 	while (hole_end <= crashk_res.end) {
 		unsigned long i;
 
-		if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
+		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
 			break;
 		if (hole_end > crashk_res.end)
 			break;
-- 
cgit v1.2.3


From d3bf37955d46718ee1a7f1fc69f953d2328ba7c2 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Sun, 30 Oct 2011 15:16:37 +0100
Subject: [S390] kdump: Add size to elfcorehdr kernel parameter

Currently only the address of the pre-allocated ELF header is passed with
the elfcorehdr= kernel parameter. In order to reserve memory for the header
in the 2nd kernel also the size is required. Current kdump architecture
backends use different methods to do that, e.g. x86 uses the memmap= kernel
parameter. On s390 there is no easy way to transfer this information.
Therefore the elfcorehdr kernel parameter is extended to also pass the size.
This now can also be used as standard mechanism by all future kdump
architecture backends.

The syntax of the kernel parameter is extended as follows:

elfcorehdr=[size[KMG]@]offset[KMG]

This change is backward compatible because elfcorehdr=size is still allowed.

Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 kernel/crash_dump.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel')

diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 5f85690285d4..69ebf3380bac 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -19,9 +19,16 @@ unsigned long saved_max_pfn;
  */
 unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
 
+/*
+ * stores the size of elf header of crash image
+ */
+unsigned long long elfcorehdr_size;
+
 /*
  * elfcorehdr= specifies the location of elf core header stored by the crashed
  * kernel. This option will be passed by kexec loader to the capture kernel.
+ *
+ * Syntax: elfcorehdr=[size[KMG]@]offset[KMG]
  */
 static int __init setup_elfcorehdr(char *arg)
 {
@@ -29,6 +36,10 @@ static int __init setup_elfcorehdr(char *arg)
 	if (!arg)
 		return -EINVAL;
 	elfcorehdr_addr = memparse(arg, &end);
+	if (*end == '@') {
+		elfcorehdr_size = elfcorehdr_addr;
+		elfcorehdr_addr = memparse(end + 1, &end);
+	}
 	return end > arg ? 0 : -EINVAL;
 }
 early_param("elfcorehdr", setup_elfcorehdr);
-- 
cgit v1.2.3


From fa8ff292bb4844cc0b66b7b24d611eb7177f7ded Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Sun, 30 Oct 2011 15:16:41 +0100
Subject: [S390] kdump: Initialize vmcoreinfo note at startup

Currently the vmcoreinfo note is only initialized in case of kdump. On s390
it is possible to create kernel dumps with other dump mechanisms than kdump
(e.g. via hypervisor dump or stand-alone dump tools). For those dumps it
would also be desirable to include the vmcoreinfo data. To accomplish this,
with this patch the vmcoreinfo ELF note is always initialized, not only in
case of a (kdump) crash. On s390 we will add an ABI defined pointer at
a well known address to vmcoreinfo so that dump analysis tools are able to
find this information.

In particular on s390 we have a tool named zgetdump. With this tool it is
possible to convert dump formats on the fly using fuse. E.g. you can mount a
s390 stand-alone dump as ELF dump. When this is done, the tool finds the
vmcoreinfo in the stand-alone dump via the well known ABI defined address and
it creates the respective VMCOREINFO ELF note in the output ELF dump. This then
can be used e.g. by makedumpfile for dump filtering.  No more need for a
vmlinux file with debug information.

So this will look like the following:
$ zgetdump --mount standalone.dump -f elf /mnt
$ ls /mnt
  dump.elf
$ readelf -n /mnt/dump.elf
$ ...
  VMCOREINFO            0x00000474      Unknown note type: (0x00000000)
$ makedumpfile -c -d 31 /mnt/dump.elf dump.kdump

Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 kernel/kexec.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 7204fb982ed5..d3b8a4ceb90b 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1380,24 +1380,23 @@ int __init parse_crashkernel(char 		 *cmdline,
 }
 
 
-
-void crash_save_vmcoreinfo(void)
+static void update_vmcoreinfo_note(void)
 {
-	u32 *buf;
+	u32 *buf = vmcoreinfo_note;
 
 	if (!vmcoreinfo_size)
 		return;
-
-	vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
-
-	buf = (u32 *)vmcoreinfo_note;
-
 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
 			      vmcoreinfo_size);
-
 	final_note(buf);
 }
 
+void crash_save_vmcoreinfo(void)
+{
+	vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
+	update_vmcoreinfo_note();
+}
+
 void vmcoreinfo_append_str(const char *fmt, ...)
 {
 	va_list args;
@@ -1483,6 +1482,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_swapcache);
 
 	arch_crash_save_vmcoreinfo();
+	update_vmcoreinfo_note();
 
 	return 0;
 }
-- 
cgit v1.2.3


From 558df7209e7997275f6b8ad37737494cf2da1512 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Sun, 30 Oct 2011 15:16:43 +0100
Subject: [S390] kdump: Add infrastructure for unmapping crashkernel memory

This patch introduces a mechanism that allows architecture backends to
remove page tables for the crashkernel memory. This can protect the loaded
kdump kernel from being overwritten by broken kernel code.  Two new
functions crash_map_reserved_pages() and crash_unmap_reserved_pages() are
added that can be implemented by architecture code.  The
crash_map_reserved_pages() function is called before and
crash_unmap_reserved_pages() after the crashkernel segments are loaded.  The
functions are also called in crash_shrink_memory() to create/remove page
tables when the crashkernel memory size is reduced.

To support architectures that have large pages this patch also introduces
a new define KEXEC_CRASH_MEM_ALIGN. The crashkernel start and size must
always be aligned with KEXEC_CRASH_MEM_ALIGN.

Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 kernel/kexec.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index d3b8a4ceb90b..dc7bc0829286 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -999,6 +999,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 			kimage_free(xchg(&kexec_crash_image, NULL));
 			result = kimage_crash_alloc(&image, entry,
 						     nr_segments, segments);
+			crash_map_reserved_pages();
 		}
 		if (result)
 			goto out;
@@ -1015,6 +1016,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 				goto out;
 		}
 		kimage_terminate(image);
+		if (flags & KEXEC_ON_CRASH)
+			crash_unmap_reserved_pages();
 	}
 	/* Install the new kernel, and  Uninstall the old */
 	image = xchg(dest_image, image);
@@ -1026,6 +1029,18 @@ out:
 	return result;
 }
 
+/*
+ * Add and remove page tables for crashkernel memory
+ *
+ * Provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __weak crash_map_reserved_pages(void)
+{}
+
+void __weak crash_unmap_reserved_pages(void)
+{}
+
 #ifdef CONFIG_COMPAT
 asmlinkage long compat_sys_kexec_load(unsigned long entry,
 				unsigned long nr_segments,
@@ -1134,14 +1149,16 @@ int crash_shrink_memory(unsigned long new_size)
 		goto unlock;
 	}
 
-	start = roundup(start, PAGE_SIZE);
-	end = roundup(start + new_size, PAGE_SIZE);
+	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
+	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
 
+	crash_map_reserved_pages();
 	crash_free_reserved_phys_range(end, crashk_res.end);
 
 	if ((start == end) && (crashk_res.parent != NULL))
 		release_resource(&crashk_res);
 	crashk_res.end = end - 1;
+	crash_unmap_reserved_pages();
 
 unlock:
 	mutex_unlock(&kexec_mutex);
-- 
cgit v1.2.3


From 638ad34a8811119b32247b7722288ef8b90907d1 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Sun, 30 Oct 2011 15:17:13 +0100
Subject: [S390] sparse: fix sparse warnings about missing prototypes

Add prototypes and includes for functions used in different modules.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 kernel/sysctl.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2d2ecdcc8cdb..2fe2bc2a57ea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -151,14 +151,6 @@ extern int pwrsw_enabled;
 extern int unaligned_enabled;
 #endif
 
-#ifdef CONFIG_S390
-#ifdef CONFIG_MATHEMU
-extern int sysctl_ieee_emulation_warnings;
-#endif
-extern int sysctl_userprocess_debug;
-extern int spin_retry;
-#endif
-
 #ifdef CONFIG_IA64
 extern int no_unaligned_warning;
 extern int unaligned_dump_stack;
-- 
cgit v1.2.3


From 6d274309d0e64bdbdb6c50945ca2964596e8fa5a Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 30 Sep 2011 10:48:38 -0500
Subject: irq: support domains with non-zero hwirq base

Interrupt controllers can have non-zero starting value for h/w irq numbers.
Adding support in irq_domain allows the domain hwirq numbering to match
the interrupt controllers' numbering.

As this makes looping over irqs for a domain more complicated, add loop
iterators to iterate over all hwirqs and irqs for a domain.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Reviewed-by: Jamie Iles <jamie@jamieiles.com>
Tested-by: Thomas Abraham <thomas.abraham@linaro.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/irqdomain.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index b57a3776de44..200ce832c585 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -20,15 +20,15 @@ static DEFINE_MUTEX(irq_domain_mutex);
 void irq_domain_add(struct irq_domain *domain)
 {
 	struct irq_data *d;
-	int hwirq;
+	int hwirq, irq;
 
 	/*
 	 * This assumes that the irq_domain owner has already allocated
 	 * the irq_descs.  This block will be removed when support for dynamic
 	 * allocation of irq_descs is added to irq_domain.
 	 */
-	for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
-		d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
+	irq_domain_for_each_irq(domain, hwirq, irq) {
+		d = irq_get_irq_data(irq);
 		if (!d) {
 			WARN(1, "error: assigning domain to non existant irq_desc");
 			return;
@@ -54,15 +54,15 @@ void irq_domain_add(struct irq_domain *domain)
 void irq_domain_del(struct irq_domain *domain)
 {
 	struct irq_data *d;
-	int hwirq;
+	int hwirq, irq;
 
 	mutex_lock(&irq_domain_mutex);
 	list_del(&domain->list);
 	mutex_unlock(&irq_domain_mutex);
 
 	/* Clear the irq_domain assignments */
-	for (hwirq = 0; hwirq < domain->nr_irq; hwirq++) {
-		d = irq_get_irq_data(irq_domain_to_irq(domain, hwirq));
+	irq_domain_for_each_irq(domain, hwirq, irq) {
+		d = irq_get_irq_data(irq);
 		d->domain = NULL;
 	}
 }
-- 
cgit v1.2.3


From 9a418455134f5dc23f124d2818b2e8e1cea997a1 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 25 May 2011 09:52:21 -0400
Subject: range: fix bogus misuse of module.h to get printk()

This file isn't doing anything with modules and so it should
not be including <linux/module.h> just to get basic stuff
like printk() and min/max.  Revector it to <linux/kernel.h>.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/range.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/range.c b/kernel/range.c
index 37fa9b99ad58..9b8ae2d6ed68 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -1,7 +1,7 @@
 /*
  * Range add and subtract
  */
-#include <linux/module.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sort.h>
 
-- 
cgit v1.2.3


From 9984de1a5a8a96275fcab818f7419af5a3c86e71 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 23 May 2011 14:51:41 -0400
Subject: kernel: Map most files to use export.h instead of module.h

The changed files were only including linux/module.h for the
EXPORT_SYMBOL infrastructure, and nothing else.  Revector them
onto the isolated export header for faster compile times.

Nothing to see here but a whole lot of instances of:

  -#include <linux/module.h>
  +#include <linux/export.h>

This commit is only changing the kernel dir; next targets
will probably be mm, fs, the arch dirs, etc.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/async.c                | 2 +-
 kernel/audit.c                | 2 +-
 kernel/auditsc.c              | 2 +-
 kernel/capability.c           | 2 +-
 kernel/cgroup_freezer.c       | 2 +-
 kernel/cpu.c                  | 2 +-
 kernel/cpuset.c               | 2 +-
 kernel/crash_dump.c           | 2 +-
 kernel/cred.c                 | 2 +-
 kernel/dma.c                  | 2 +-
 kernel/freezer.c              | 2 +-
 kernel/futex.c                | 2 +-
 kernel/groups.c               | 2 +-
 kernel/hrtimer.c              | 2 +-
 kernel/hung_task.c            | 2 +-
 kernel/irq_work.c             | 2 +-
 kernel/kfifo.c                | 2 +-
 kernel/kprobes.c              | 2 +-
 kernel/ksysfs.c               | 2 +-
 kernel/kthread.c              | 2 +-
 kernel/latencytop.c           | 2 +-
 kernel/lockdep_proc.c         | 2 +-
 kernel/module.c               | 2 +-
 kernel/mutex-debug.c          | 2 +-
 kernel/mutex.c                | 2 +-
 kernel/notifier.c             | 2 +-
 kernel/nsproxy.c              | 2 +-
 kernel/padata.c               | 2 +-
 kernel/pid.c                  | 2 +-
 kernel/posix-timers.c         | 2 +-
 kernel/profile.c              | 2 +-
 kernel/ptrace.c               | 2 +-
 kernel/rcupdate.c             | 2 +-
 kernel/rcutiny.c              | 2 +-
 kernel/rcutree.c              | 2 +-
 kernel/relay.c                | 2 +-
 kernel/resource.c             | 2 +-
 kernel/rtmutex-debug.c        | 2 +-
 kernel/rtmutex-tester.c       | 2 +-
 kernel/rtmutex.c              | 2 +-
 kernel/rwsem.c                | 2 +-
 kernel/sched_clock.c          | 2 +-
 kernel/semaphore.c            | 2 +-
 kernel/signal.c               | 2 +-
 kernel/smp.c                  | 2 +-
 kernel/softirq.c              | 2 +-
 kernel/spinlock.c             | 2 +-
 kernel/srcu.c                 | 2 +-
 kernel/stacktrace.c           | 2 +-
 kernel/stop_machine.c         | 2 +-
 kernel/sys.c                  | 2 +-
 kernel/time.c                 | 2 +-
 kernel/timer.c                | 2 +-
 kernel/up.c                   | 2 +-
 kernel/user-return-notifier.c | 2 +-
 kernel/user.c                 | 2 +-
 kernel/user_namespace.c       | 2 +-
 kernel/utsname.c              | 2 +-
 kernel/utsname_sysctl.c       | 2 +-
 kernel/wait.c                 | 2 +-
 kernel/workqueue.c            | 2 +-
 61 files changed, 61 insertions(+), 61 deletions(-)

(limited to 'kernel')

diff --git a/kernel/async.c b/kernel/async.c
index 4c2843c0043e..80b74b88fefe 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -51,7 +51,7 @@ asynchronous and synchronous parts of the kernel.
 #include <linux/async.h>
 #include <linux/atomic.h>
 #include <linux/ktime.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/kernel/audit.c b/kernel/audit.c
index 0a1355ca3d79..09fae2677a45 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -45,7 +45,7 @@
 #include <asm/types.h>
 #include <linux/atomic.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/kthread.h>
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ce4b054acee5..47b7fc1ea893 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -48,7 +48,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
 #include <linux/socket.h>
diff --git a/kernel/capability.c b/kernel/capability.c
index 283c529f8b1c..b463871a4e69 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -10,7 +10,7 @@
 #include <linux/audit.h>
 #include <linux/capability.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/pid_namespace.h>
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e691818d7e45..5e828a2ca8e6 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -14,7 +14,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/cgroup.h>
 #include <linux/fs.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 12b7458f23b1..6a81ca906a06 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,7 +10,7 @@
 #include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10131fdaff70..d970fb508e34 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -37,7 +37,7 @@
 #include <linux/mempolicy.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
index 5f85690285d4..20e699265cef 100644
--- a/kernel/crash_dump.c
+++ b/kernel/crash_dump.c
@@ -2,7 +2,7 @@
 #include <linux/crash_dump.h>
 #include <linux/init.h>
 #include <linux/errno.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * If we have booted due to a crash, max_pfn will be a very low value. We need
diff --git a/kernel/cred.c b/kernel/cred.c
index bb55d052d858..5791612a4045 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
diff --git a/kernel/dma.c b/kernel/dma.c
index f903189c5304..68a2306522c8 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -9,7 +9,7 @@
  *   [It also happened to remove the sizeof(char *) == sizeof(int)
  *   assumption introduced because of those /proc/dma patches. -- Hennus]
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/spinlock.h>
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 66a594e8ad2f..f24aa0005530 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -6,7 +6,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/suspend.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 1511dff0cfd6..ea87f4d2f455 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -55,7 +55,7 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/magic.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
diff --git a/kernel/groups.c b/kernel/groups.c
index 1cc476d52dd3..99b53d1eb7ea 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -2,7 +2,7 @@
  * Supplementary group IDs
  */
 #include <linux/cred.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index a9205e32a059..422e567eecf6 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -32,7 +32,7 @@
  */
 
 #include <linux/cpu.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/hrtimer.h>
 #include <linux/notifier.h>
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ea640120ab86..8b1748d0172c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -13,7 +13,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/lockdep.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sysctl.h>
 
 /*
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 0e2cde4f380b..3e460ea44955 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/irq_work.h>
 #include <linux/hardirq.h>
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 01a0700e873f..c744b88c44e2 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -20,7 +20,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/log2.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2f193d0ba7f2..e5d84644823b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -36,7 +36,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
 #include <linux/freezer.h>
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 3b053c04dd86..6771de3a655d 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -11,7 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
 #include <linux/profile.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ba7cccb4994..b6d216a92639 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -12,7 +12,7 @@
 #include <linux/cpuset.h>
 #include <linux/unistd.h>
 #include <linux/file.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/freezer.h>
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 4ac8ebfcab59..a462b317f9a0 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -53,7 +53,7 @@
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 #include <linux/proc_fs.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/list.h>
 #include <linux/stacktrace.h>
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 71edd2f60c02..91c32a0b612c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -11,7 +11,7 @@
  * Code for /proc/lockdep and /proc/lockdep_stats:
  *
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
diff --git a/kernel/module.c b/kernel/module.c
index 93342d992f34..84205ae1607a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -16,7 +16,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/moduleloader.h>
 #include <linux/ftrace_event.h>
 #include <linux/init.h>
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 73da83aff418..7e3443fe1f48 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -14,7 +14,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/poison.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
diff --git a/kernel/mutex.c b/kernel/mutex.c
index d607ed5dd441..89096dd8786f 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -19,7 +19,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 8d7b435806c9..2d5cc4ccff7f 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -1,6 +1,6 @@
 #include <linux/kdebug.h>
 #include <linux/kprobes.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
 #include <linux/vmalloc.h>
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 9aeab4b98c64..b576f7f14bc6 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -14,7 +14,7 @@
  */
 
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/nsproxy.h>
 #include <linux/init_task.h>
 #include <linux/mnt_namespace.h>
diff --git a/kernel/padata.c b/kernel/padata.c
index b91941df5e63..b45259931512 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -18,7 +18,7 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/cpu.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index 8cafe7e72ad2..fa5f72227e5f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -27,7 +27,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rculist.h>
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 4556182527f3..69185ae6b701 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -46,7 +46,7 @@
 #include <linux/syscalls.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
diff --git a/kernel/profile.c b/kernel/profile.c
index 961b389fe52f..76b8e77773ee 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -13,7 +13,7 @@
  *	to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/profile.h>
 #include <linux/bootmem.h>
 #include <linux/notifier.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a70d2a5d8c7b..24d04477b257 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/capability.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index ca0d23b6b3e8..c5b98e565aee 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -43,7 +43,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/hardirq.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index da775c87f27f..b5e525d67fe3 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -28,7 +28,7 @@
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/types.h>
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e234eb92a177..6b76d812740c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -38,7 +38,7 @@
 #include <linux/nmi.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
diff --git a/kernel/relay.c b/kernel/relay.c
index 859ea5a9605f..226fade4d727 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/string.h>
 #include <linux/relay.h>
 #include <linux/vmalloc.h>
diff --git a/kernel/resource.c b/kernel/resource.c
index c8dc249da5ce..7640b3a947d0 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -7,7 +7,7 @@
  * Arbitrary resource management.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index a2e7e7210f3e..8eafd1bd273e 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -18,7 +18,7 @@
  */
 #include <linux/sched.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/syscalls.h>
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 5c9ccd380966..3d9f31cd79e7 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -7,7 +7,7 @@
  *
  */
 #include <linux/kthread.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/sysdev.h>
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 5e8d9cce7470..f9d8482dd487 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -11,7 +11,7 @@
  *  See Documentation/rt-mutex-design.txt for details.
  */
 #include <linux/spinlock.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 9f48f3d82e9b..b152f74f02de 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/rwsem.h>
 
 #include <asm/system.h>
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 9d8af0b3fb64..c685e31492df 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -62,7 +62,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index d831841e55a7..60636a4e25c3 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -27,7 +27,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/semaphore.h>
 #include <linux/spinlock.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index d252be2d3de5..b3f78d09a105 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/fs.h>
diff --git a/kernel/smp.c b/kernel/smp.c
index fb67dfa8394e..db197d60489b 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -6,7 +6,7 @@
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
diff --git a/kernel/softirq.c b/kernel/softirq.c
index fca82c32042b..2c71d91efff0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -10,7 +10,7 @@
  *	Remote softirq infrastructure is by Jens Axboe.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index be6517fb9c14..84c7d96918bf 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -19,7 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 73ce23feaea9..0febf61e1aa3 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -24,7 +24,7 @@
  *
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index d20c6983aad9..00fe55cc5a82 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -7,7 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
 
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba5070ce5765..e78db365fa83 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -12,7 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kthread.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 58459509b14c..4a0286241829 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/utsname.h>
 #include <linux/mman.h>
diff --git a/kernel/time.c b/kernel/time.c
index d77606214529..73e416db0a1e 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -27,7 +27,7 @@
  *	with nanosecond accuracy
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
 #include <linux/clocksource.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 8cff36119e4d..dbaa62422b13 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -20,7 +20,7 @@
  */
 
 #include <linux/kernel_stat.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
diff --git a/kernel/up.c b/kernel/up.c
index 1ff27a28bb7d..c54c75e9faf7 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -4,7 +4,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/smp.h>
 
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index 92cb706c7fc8..1744bb80f1fb 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -2,7 +2,7 @@
 #include <linux/user-return-notifier.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
 
diff --git a/kernel/user.c b/kernel/user.c
index 9e03e9c1df8d..71dd2363ab0f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -14,7 +14,7 @@
 #include <linux/bitops.h>
 #include <linux/key.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/user_namespace.h>
 
 /*
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9da289c34f22..3b906e98b1db 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -5,7 +5,7 @@
  *  License.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index bff131b9510a..405caf91aad5 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -9,7 +9,7 @@
  *  License.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/err.h>
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index a2cd77e70d4d..5a709452ec19 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -9,7 +9,7 @@
  *  License.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/sysctl.h>
diff --git a/kernel/wait.c b/kernel/wait.c
index f45ea8d2a1ce..26fa7797f90f 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -4,7 +4,7 @@
  * (C) 2004 William Irwin, Oracle
  */
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1783aabc6128..42fa9ad0a810 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -23,7 +23,7 @@
  * Please read Documentation/workqueue.txt for details.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-- 
cgit v1.2.3


From 56d82e000cdfb51aa92241d4682302f78c35cd92 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 26 May 2011 17:53:52 -0400
Subject: kernel: Add <linux/module.h> to files using it implicitly

These files are doing things like module_put and try_module_get
so they need to call out the module.h for explicit inclusion,
rather than getting it via <linux/device.h> which we ideally want
to remove the module.h inclusion from.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/trace/ftrace.c         | 1 +
 kernel/trace/trace_syscalls.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 077d85387908..900b409543db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,6 +22,7 @@
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 #include <linux/ftrace.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ee7b5a0bb9f8..cb654542c1a1 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,6 +2,7 @@
 #include <trace/events/syscalls.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
+#include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
 #include <linux/ftrace.h>
 #include <linux/perf_event.h>
 #include <asm/syscall.h>
-- 
cgit v1.2.3


From 74da1ff71350f3638c51613085f89c0865d7fe08 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 26 May 2011 12:48:41 -0400
Subject: kernel: fix several implicit usasges of kmod.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These files were implicitly relying on <linux/kmod.h> coming in via
module.h, as without it we get things like:

kernel/power/suspend.c:100: error: implicit declaration of function ‘usermodehelper_disable’
kernel/power/suspend.c:109: error: implicit declaration of function ‘usermodehelper_enable’
kernel/power/user.c:254: error: implicit declaration of function ‘usermodehelper_disable’
kernel/power/user.c:261: error: implicit declaration of function ‘usermodehelper_enable’

kernel/sys.c:317: error: implicit declaration of function ‘usermodehelper_disable’
kernel/sys.c:1816: error: implicit declaration of function ‘call_usermodehelper_setup’
kernel/sys.c:1822: error: implicit declaration of function ‘call_usermodehelper_setfns’
kernel/sys.c:1824: error: implicit declaration of function ‘call_usermodehelper_exec’

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/power/suspend.c | 1 +
 kernel/power/user.c    | 1 +
 kernel/sys.c           | 1 +
 3 files changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index fdd4263b995d..31aae3219f89 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -12,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/kmod.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 42ddbc6f0de6..6d8f535c2b88 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -12,6 +12,7 @@
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
+#include <linux/kmod.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 4a0286241829..d2cb1cda823a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -12,6 +12,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
+#include <linux/kmod.h>
 #include <linux/perf_event.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
-- 
cgit v1.2.3


From 967d1f90625ed9c1ab205d3f738fedf9d852e1fd Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 18 Jul 2011 13:03:04 -0400
Subject: kernel: fix two implicit header assumptions in irq_work.c

Up until now, this file was getting percpu.h because nearly every
file was implicitly getting module.h (and all its sub-includes).
But we want to clean that up, so call out percpu.h explicitly.
Otherwise we'll get things like this on an ARM build:

kernel/irq_work.c:48: error: expected declaration specifiers or '...' before 'irq_work_list'
kernel/irq_work.c:48: warning: type defaults to 'int' in declaration of 'DEFINE_PER_CPU'

The same thing was happening for builds on ARM for asm/processor.h

kernel/irq_work.c: In function 'irq_work_sync':
kernel/irq_work.c:166: error: implicit declaration of function 'cpu_relax'

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/irq_work.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 3e460ea44955..c3c46c72046e 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -8,7 +8,9 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq_work.h>
+#include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <asm/processor.h>
 
 /*
  * An entry can be in one of four states:
-- 
cgit v1.2.3


From 1596425fd7545abccb7b5059376e4d56c3d6be4d Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 28 Jul 2011 14:22:29 -0400
Subject: kernel: ksysfs.c is implicitly using stat.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the module.h usage cleanup, we'll get this:

kernel/ksysfs.c:161: error: ‘S_IRUGO’ undeclared here (not in a function)
make[2]: *** [kernel/ksysfs.o] Error 1

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/ksysfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6771de3a655d..4e316e1acf58 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/kexec.h>
 #include <linux/profile.h>
+#include <linux/stat.h>
 #include <linux/sched.h>
 #include <linux/capability.h>
 
-- 
cgit v1.2.3


From 72a59aaada499d9bbf19f2fb68daa37502e4a9bb Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 26 May 2011 21:07:10 -0400
Subject: kernel: params.c needs module.h not moduleparam.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Through various other implicit include paths, some files were
getting the full module.h file, and hence living the illusion
that they really only needed moduleparam.h -- but the reality
is that once you remove the module.h presence, these show up:

kernel/params.c:583: warning: ‘struct module_kobject’ declared inside parameter list

Such files really require module.h so simply make it so.  As the
file module.h grabs moduleparam.h on the fly, all will be well.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/params.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index 821788947e40..65aae11eb93f 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -15,7 +15,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
-#include <linux/moduleparam.h>
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-- 
cgit v1.2.3


From bdfa97bf7263657b83bc5b68567a3a60dde84c5b Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 25 Oct 2011 13:13:57 -0400
Subject: kernel: fix up module header handling in rcutiny files

The file rcutiny.c does not need moduleparam.h header, as
there are no modparams in this file.

However rcutiny_plugin.h does define a module_init() and
a module_exit() and it uses the various MODULE_ macros, so
it really does need module.h included.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/rcutiny.c        | 1 -
 kernel/rcutiny_plugin.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index b5e525d67fe3..636af6d9c6e5 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -22,7 +22,6 @@
  * For detailed explanation of Read-Copy Update mechanism see -
  *		Documentation/RCU
  */
-#include <linux/moduleparam.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 02aa7139861c..2b0484a5dc28 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-- 
cgit v1.2.3


From 6e5fdeedca610df600aabc393c4b1f44b128fe49 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Thu, 26 May 2011 16:00:52 -0400
Subject: kernel: Fix files explicitly needing EXPORT_SYMBOL infrastructure

These files were getting <linux/module.h> via an implicit non-obvious
path, but we want to crush those out of existence since they cost
time during compiles of processing thousands of lines of headers
for no reason.  Give them the lightweight header that just contains
the EXPORT_SYMBOL infrastructure.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/compat.c                 | 1 +
 kernel/debug/kdb/kdb_debugger.c | 1 +
 kernel/events/core.c            | 1 +
 kernel/irq/generic-chip.c       | 1 +
 kernel/power/hibernate.c        | 1 +
 kernel/power/main.c             | 1 +
 kernel/power/qos.c              | 1 +
 kernel/power/suspend.c          | 1 +
 kernel/time/posix-clock.c       | 1 +
 kernel/trace/blktrace.c         | 1 +
 10 files changed, 10 insertions(+)

(limited to 'kernel')

diff --git a/kernel/compat.c b/kernel/compat.c
index e2435ee9993a..f346cedfe24d 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/security.h>
 #include <linux/timex.h>
+#include <linux/export.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
 #include <linux/times.h>
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index d9ca9aa481ec..8b68ce78ff17 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -11,6 +11,7 @@
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
 #include <linux/kdebug.h>
+#include <linux/export.h>
 #include "kdb_private.h"
 #include "../debug_core.h"
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d1a1bee35228..92b8811f2234 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -25,6 +25,7 @@
 #include <linux/reboot.h>
 #include <linux/vmstat.h>
 #include <linux/device.h>
+#include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 6cb7613e4bf4..c89295a8f668 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -6,6 +6,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/syscore_ops.h>
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1c53f7fad5f7..b4511b6d3ef9 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -9,6 +9,7 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
diff --git a/kernel/power/main.c b/kernel/power/main.c
index a52e88425a31..71f49fe4377e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/export.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/resume-trace.h>
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 1c1797dd1d1d..2c0a65e27626 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -43,6 +43,7 @@
 #include <linux/kernel.h>
 
 #include <linux/uaccess.h>
+#include <linux/export.h>
 
 /*
  * locking rule: all changes to constraints or notifiers lists
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 31aae3219f89..4953dc054c53 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 #include <trace/events/power.h>
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index c340ca658f37..ce033c7aa2e8 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -18,6 +18,7 @@
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/device.h>
+#include <linux/export.h>
 #include <linux/file.h>
 #include <linux/posix-clock.h>
 #include <linux/slab.h>
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7c910a5593a6..16fc34a0806f 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
+#include <linux/export.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
 
-- 
cgit v1.2.3


From ec53cf23c0ddb0c29950b9a4ac46964c4c6c6c2f Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 19 Sep 2011 20:33:19 -0400
Subject: irq: don't put module.h into irq.h for tracking irqgen modules.

Recent commit "irq: Track the  owner of irq descriptor" in
commit ID b6873807a7143b7 placed module.h into linux/irq.h
but we are trying to limit module.h inclusion to just C files
that really need it, due to its size and number of children
includes.  This targets just reversing that include.

Add in the basic "struct module" since that is all we really need
to ensure things compile.  In theory, b687380 should have added the
module.h include to the irqdesc.h header as well, but the implicit
module.h everywhere presence masked this from showing up.  So give
it the "struct module" as well.

As for the C files, irqdesc.c is only using THIS_MODULE, so it
does not need module.h - give it export.h instead.  The C file
irq/manage.c is now (as of b687380) using try_module_get and
module_put and so it needs module.h (which it already has).

Also convert the irq_alloc_descs variants to macros, since all
they really do is is call the __irq_alloc_descs primitive.
This avoids including export.h and no debug info is lost.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/irq/irqdesc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1550e8447a16..d86e254b95eb 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -9,7 +9,7 @@
  */
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/radix-tree.h>
-- 
cgit v1.2.3


From fcf634098c00dd9cd247447368495f0b79be12d1 Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Mon, 31 Oct 2011 17:06:39 -0700
Subject: Cross Memory Attach

The basic idea behind cross memory attach is to allow MPI programs doing
intra-node communication to do a single copy of the message rather than a
double copy of the message via shared memory.

The following patch attempts to achieve this by allowing a destination
process, given an address and size from a source process, to copy memory
directly from the source process into its own address space via a system
call.  There is also a symmetrical ability to copy from the current
process's address space into a destination process's address space.

- Use of /proc/pid/mem has been considered, but there are issues with
  using it:
  - Does not allow for specifying iovecs for both src and dest, assuming
    preadv or pwritev was implemented either the area read from or
  written to would need to be contiguous.
  - Currently mem_read allows only processes who are currently
  ptrace'ing the target and are still able to ptrace the target to read
  from the target. This check could possibly be moved to the open call,
  but its not clear exactly what race this restriction is stopping
  (reason  appears to have been lost)
  - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix
  domain socket is a bit ugly from a userspace point of view,
  especially when you may have hundreds if not (eventually) thousands
  of processes  that all need to do this with each other
  - Doesn't allow for some future use of the interface we would like to
  consider adding in the future (see below)
  - Interestingly reading from /proc/pid/mem currently actually
  involves two copies! (But this could be fixed pretty easily)

As mentioned previously use of vmsplice instead was considered, but has
problems.  Since you need the reader and writer working co-operatively if
the pipe is not drained then you block.  Which requires some wrapping to
do non blocking on the send side or polling on the receive.  In all to all
communication it requires ordering otherwise you can deadlock.  And in the
example of many MPI tasks writing to one MPI task vmsplice serialises the
copying.

There are some cases of MPI collectives where even a single copy interface
does not get us the performance gain we could.  For example in an
MPI_Reduce rather than copy the data from the source we would like to
instead use it directly in a mathops (say the reduce is doing a sum) as
this would save us doing a copy.  We don't need to keep a copy of the data
from the source.  I haven't implemented this, but I think this interface
could in the future do all this through the use of the flags - eg could
specify the math operation and type and the kernel rather than just
copying the data would apply the specified operation between the source
and destination and store it in the destination.

Although we don't have a "second user" of the interface (though I've had
some nibbles from people who may be interested in using it for intra
process messaging which is not MPI).  This interface is something which
hardware vendors are already doing for their custom drivers to implement
fast local communication.  And so in addition to this being useful for
OpenMPI it would mean the driver maintainers don't have to fix things up
when the mm changes.

There was some discussion about how much faster a true zero copy would
go. Here's a link back to the email with some testing I did on that:

http://marc.info/?l=linux-mm&m=130105930902915&w=2

There is a basic man page for the proposed interface here:

http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt

This has been implemented for x86 and powerpc, other architecture should
mainly (I think) just need to add syscall numbers for the process_vm_readv
and process_vm_writev. There are 32 bit compatibility versions for
64-bit kernels.

For arch maintainers there are some simple tests to be able to quickly
verify that the syscalls are working correctly here:

http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: <linux-man@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys_ni.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a9a5de07c4f1..47bfa16430d7 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -145,6 +145,10 @@ cond_syscall(sys_io_submit);
 cond_syscall(sys_io_cancel);
 cond_syscall(sys_io_getevents);
 cond_syscall(sys_syslog);
+cond_syscall(sys_process_vm_readv);
+cond_syscall(sys_process_vm_writev);
+cond_syscall(compat_sys_process_vm_readv);
+cond_syscall(compat_sys_process_vm_writev);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
-- 
cgit v1.2.3


From c9f01245b6a7d77d17deaa71af10f6aca14fa24e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 31 Oct 2011 17:07:15 -0700
Subject: oom: remove oom_disable_count

This removes mm->oom_disable_count entirely since it's unnecessary and
currently buggy.  The counter was intended to be per-process but it's
currently decremented in the exit path for each thread that exits, causing
it to underflow.

The count was originally intended to prevent oom killing threads that
share memory with threads that cannot be killed since it doesn't lead to
future memory freeing.  The counter could be fixed to represent all
threads sharing the same mm, but it's better to remove the count since:

 - it is possible that the OOM_DISABLE thread sharing memory with the
   victim is waiting on that thread to exit and will actually cause
   future memory freeing, and

 - there is no guarantee that a thread is disabled from oom killing just
   because another thread sharing its mm is oom disabled.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ying Han <yinghan@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/exit.c |  2 --
 kernel/fork.c | 10 +---------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 2913b3509d42..d0b7d988f873 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -681,8 +681,6 @@ static void exit_mm(struct task_struct * tsk)
 	enter_lazy_tlb(mm, current);
 	/* We don't want this task to be frozen prematurely */
 	clear_freeze_flag(tsk);
-	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-		atomic_dec(&mm->oom_disable_count);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e6b6f4fb272..70d76191afb9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,7 +501,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	mm->cached_hole_size = ~0UL;
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
-	atomic_set(&mm->oom_disable_count, 0);
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
@@ -816,8 +815,6 @@ good_mm:
 	/* Initializing for Swap token stuff */
 	mm->token_priority = 0;
 	mm->last_interval = 0;
-	if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-		atomic_inc(&mm->oom_disable_count);
 
 	tsk->mm = mm;
 	tsk->active_mm = mm;
@@ -1391,13 +1388,8 @@ bad_fork_cleanup_io:
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_mm:
-	if (p->mm) {
-		task_lock(p);
-		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
-			atomic_dec(&p->mm->oom_disable_count);
-		task_unlock(p);
+	if (p->mm)
 		mmput(p->mm);
-	}
 bad_fork_cleanup_signal:
 	if (!(clone_flags & CLONE_THREAD))
 		free_signal_struct(p->signal);
-- 
cgit v1.2.3


From bc3e53f682d93df677dbd5006a404722b3adfe18 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Mon, 31 Oct 2011 17:07:30 -0700
Subject: mm: distinguish between mlocked and pinned pages

Some kernel components pin user space memory (infiniband and perf) (by
increasing the page count) and account that memory as "mlocked".

The difference between mlocking and pinning is:

A. mlocked pages are marked with PG_mlocked and are exempt from
   swapping. Page migration may move them around though.
   They are kept on a special LRU list.

B. Pinned pages cannot be moved because something needs to
   directly access physical memory. They may not be on any
   LRU list.

I recently saw an mlockalled process where mm->locked_vm became
bigger than the virtual size of the process (!) because some
memory was accounted for twice:

Once when the page was mlocked and once when the Infiniband
layer increased the refcount because it needt to pin the RDMA
memory.

This patch introduces a separate counter for pinned pages and
accounts them seperately.

Signed-off-by: Christoph Lameter <cl@linux.com>
Cc: Mike Marciniszyn <infinipath@qlogic.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/events/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d1a1bee35228..12a0287e0358 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3544,7 +3544,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 		struct ring_buffer *rb = event->rb;
 
 		atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
-		vma->vm_mm->locked_vm -= event->mmap_locked;
+		vma->vm_mm->pinned_vm -= event->mmap_locked;
 		rcu_assign_pointer(event->rb, NULL);
 		mutex_unlock(&event->mmap_mutex);
 
@@ -3625,7 +3625,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->locked_vm + extra;
+	locked = vma->vm_mm->pinned_vm + extra;
 
 	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
 		!capable(CAP_IPC_LOCK)) {
@@ -3651,7 +3651,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	atomic_long_add(user_extra, &user->locked_vm);
 	event->mmap_locked = extra;
 	event->mmap_user = get_current_user();
-	vma->vm_mm->locked_vm += event->mmap_locked;
+	vma->vm_mm->pinned_vm += event->mmap_locked;
 
 unlock:
 	if (!ret)
-- 
cgit v1.2.3


From f445027e4e692bd885118460b292d08027fd5501 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 31 Oct 2011 17:11:15 -0700
Subject: stop_machine: make stop_machine safe and efficient to call early

Make stop_machine() safe to call early in boot, before SMP has been set
up, by simply calling the callback function directly if there's only one
CPU online.

[ Fixes from AKPM:
   - add comment
   - local_irq_flags, not save_flags
   - also call hard_irq_disable() for systems which need it

  Tejun suggested using an explicit flag rather than just looking at
  the online cpu count. ]

Cc: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Tejun Heo <htejun@gmail.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/stop_machine.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'kernel')

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba5070ce5765..5b0951aa0496 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -41,6 +41,7 @@ struct cpu_stopper {
 };
 
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
+static bool stop_machine_initialized = false;
 
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
@@ -386,6 +387,8 @@ static int __init cpu_stop_init(void)
 	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
 	register_cpu_notifier(&cpu_stop_cpu_notifier);
 
+	stop_machine_initialized = true;
+
 	return 0;
 }
 early_initcall(cpu_stop_init);
@@ -485,6 +488,25 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 					    .num_threads = num_online_cpus(),
 					    .active_cpus = cpus };
 
+	if (!stop_machine_initialized) {
+		/*
+		 * Handle the case where stop_machine() is called
+		 * early in boot before stop_machine() has been
+		 * initialized.
+		 */
+		unsigned long flags;
+		int ret;
+
+		WARN_ON_ONCE(smdata.num_threads != 1);
+
+		local_irq_save(flags);
+		hard_irq_disable();
+		ret = (*fn)(data);
+		local_irq_restore(flags);
+
+		return ret;
+	}
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&smdata, STOPMACHINE_PREPARE);
 	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
-- 
cgit v1.2.3


From 4ff819515b203f937cc6c8a0215a37a68d1ee71f Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Mon, 31 Oct 2011 17:11:18 -0700
Subject: watchdog: move watchdog_*_all_cpus under CONFIG_SYSCTL

Fix compilation warnings for CONFIG_SYSCTL=n:

fixed compilation warnings in case of disabled CONFIG_SYSCTL
kernel/watchdog.c:483:13: warning: `watchdog_enable_all_cpus' defined but not used
kernel/watchdog.c:500:13: warning: `watchdog_disable_all_cpus' defined but not used

these functions are static and are used only in sysctl handler, so move
them inside #ifdef CONFIG_SYSCTL too

Signed-off-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d680381b0e9c..1d7bca7f4f52 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -481,6 +481,8 @@ static void watchdog_disable(int cpu)
 	}
 }
 
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
 static void watchdog_enable_all_cpus(void)
 {
 	int cpu;
@@ -510,8 +512,6 @@ static void watchdog_disable_all_cpus(void)
 }
 
 
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
 /*
  * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh
  */
-- 
cgit v1.2.3


From 73efc0394e148d0e15583e13712637831f926720 Mon Sep 17 00:00:00 2001
From: Dan Ballard <dan@mindstab.net>
Date: Mon, 31 Oct 2011 17:11:20 -0700
Subject: kernel/sysctl.c: add cap_last_cap to /proc/sys/kernel

Userspace needs to know the highest valid capability of the running
kernel, which right now cannot reliably be retrieved from the header files
only.  The fact that this value cannot be determined properly right now
creates various problems for libraries compiled on newer header files
which are run on older kernels.  They assume capabilities are available
which actually aren't.  libcap-ng is one example.  And we ran into the
same problem with systemd too.

Now the capability is exported in /proc/sys/kernel/cap_last_cap.

[akpm@linux-foundation.org: make cap_last_cap const, per Ulrich]
Signed-off-by: Dan Ballard <dan@mindstab.net>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Ulrich Drepper <drepper@akkadia.org>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2d2ecdcc8cdb..c49d66658ec0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -57,6 +57,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/kmod.h>
+#include <linux/capability.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -134,6 +135,7 @@ static int minolduid;
 static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
+static const int cap_last_cap = CAP_LAST_CAP;
 
 #ifdef CONFIG_INOTIFY_USER
 #include <linux/inotify.h>
@@ -740,6 +742,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "cap_last_cap",
+		.data		= (void *)&cap_last_cap,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
 #if defined(CONFIG_LOCKUP_DETECTOR)
 	{
 		.procname       = "watchdog",
-- 
cgit v1.2.3


From 0eca6b7c78fd997e02bd9850e608102382b7822e Mon Sep 17 00:00:00 2001
From: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Date: Mon, 31 Oct 2011 17:11:25 -0700
Subject: printk: add module parameter ignore_loglevel to control
 ignore_loglevel

We are enabling some power features on medfield.  To test suspend-2-RAM
conveniently, we need turn on/off ignore_loglevel frequently without
rebooting.

Add a module parameter, so users can change it by:
/sys/module/printk/parameters/ignore_loglevel

Signed-off-by: Yanmin Zhang <yanmin.zhang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index b7da18391c38..e62f949ec140 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -532,6 +532,9 @@ static int __init ignore_loglevel_setup(char *str)
 }
 
 early_param("ignore_loglevel", ignore_loglevel_setup);
+module_param_named(ignore_loglevel, ignore_loglevel, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting, to"
+	"print all kernel messages to the console.");
 
 /*
  * Write out chars from start to end - 1 inclusive
-- 
cgit v1.2.3


From 134620f7a865b3bc9e3d56d460603592b70ede21 Mon Sep 17 00:00:00 2001
From: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Date: Mon, 31 Oct 2011 17:11:27 -0700
Subject: printk: add console_suspend module parameter

We are enabling some power features on medfield.  To test suspend-2-RAM
conveniently, we need turn on/off console_suspend_enabled frequently.

Add a module parameter, so users could change it by:
/sys/module/printk/parameters/console_suspend

Signed-off-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index e62f949ec140..6d9dedd11450 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1111,6 +1111,10 @@ static int __init console_suspend_disable(char *str)
 	return 1;
 }
 __setup("no_console_suspend", console_suspend_disable);
+module_param_named(console_suspend, console_suspend_enabled,
+		bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
+	" and hibernate operations");
 
 /**
  * suspend_console - suspend the console subsystem
-- 
cgit v1.2.3


From 48e41899e4a3592746e5263c14681bf5c1393563 Mon Sep 17 00:00:00 2001
From: William Douglas <william.r.douglas@gmail.com>
Date: Mon, 31 Oct 2011 17:11:29 -0700
Subject: printk: fix bounds checking for log_prefix

Currently log_prefix is testing that the first character of the log level
and facility is less than '0' and greater than '9' (which is always
false).  It should be testing to see if the character less than '0' or
greater than '9' instead.  This patch makes that change.

The code being changed worked because strtoul bombs out (endp isn't
updated) and 0 is returned anyway.

Signed-off-by: William Douglas <william.douglas@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 6d9dedd11450..286d2c7be52c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -595,7 +595,7 @@ static size_t log_prefix(const char *p, unsigned int *level, char *special)
 		/* multi digit including the level and facility number */
 		char *endp = NULL;
 
-		if (p[1] < '0' && p[1] > '9')
+		if (p[1] < '0' || p[1] > '9')
 			return 0;
 
 		lev = (simple_strtoul(&p[1], &endp, 10) & 7);
-- 
cgit v1.2.3


From ae29bc92da01a2e9d278a9a58c3b307d41cc0254 Mon Sep 17 00:00:00 2001
From: William Douglas <william.r.douglas@gmail.com>
Date: Mon, 31 Oct 2011 17:11:31 -0700
Subject: printk: remove bounds checking for log_prefix

Currently log_prefix is testing that the first character of the log level
and facility is less than '0' and greater than '9' (which is always
false).

Since the code being updated works because strtoul bombs out (endp isn't
updated) and 0 is returned anyway just remove the check and don't change
the behavior of the function.

Signed-off-by: William Douglas <william.douglas@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 286d2c7be52c..1455a0d4eedd 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -595,9 +595,6 @@ static size_t log_prefix(const char *p, unsigned int *level, char *special)
 		/* multi digit including the level and facility number */
 		char *endp = NULL;
 
-		if (p[1] < '0' || p[1] > '9')
-			return 0;
-
 		lev = (simple_strtoul(&p[1], &endp, 10) & 7);
 		if (endp == NULL || endp[0] != '>')
 			return 0;
-- 
cgit v1.2.3


From 50e1499f468fd74c6db95deb2e1e6bfee578ae70 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 31 Oct 2011 17:12:51 -0700
Subject: kgdb: follow rename pack_hex_byte() to hex_byte_pack()

There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/debug/gdbstub.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 34872482315e..c22d8c28ad84 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -217,7 +217,7 @@ void gdbstub_msg_write(const char *s, int len)
 
 		/* Pack in hex chars */
 		for (i = 0; i < wcount; i++)
-			bufptr = pack_hex_byte(bufptr, s[i]);
+			bufptr = hex_byte_pack(bufptr, s[i]);
 		*bufptr = '\0';
 
 		/* Move up */
@@ -249,7 +249,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count)
 	if (err)
 		return NULL;
 	while (count > 0) {
-		buf = pack_hex_byte(buf, *tmp);
+		buf = hex_byte_pack(buf, *tmp);
 		tmp++;
 		count--;
 	}
@@ -411,14 +411,14 @@ static char *pack_threadid(char *pkt, unsigned char *id)
 	limit = id + (BUF_THREAD_ID_SIZE / 2);
 	while (id < limit) {
 		if (!lzero || *id != 0) {
-			pkt = pack_hex_byte(pkt, *id);
+			pkt = hex_byte_pack(pkt, *id);
 			lzero = 0;
 		}
 		id++;
 	}
 
 	if (lzero)
-		pkt = pack_hex_byte(pkt, 0);
+		pkt = hex_byte_pack(pkt, 0);
 
 	return pkt;
 }
@@ -486,7 +486,7 @@ static void gdb_cmd_status(struct kgdb_state *ks)
 	dbg_remove_all_break();
 
 	remcom_out_buffer[0] = 'S';
-	pack_hex_byte(&remcom_out_buffer[1], ks->signo);
+	hex_byte_pack(&remcom_out_buffer[1], ks->signo);
 }
 
 static void gdb_get_regs_helper(struct kgdb_state *ks)
@@ -954,7 +954,7 @@ int gdb_serial_stub(struct kgdb_state *ks)
 		/* Reply to host that an exception has occurred */
 		ptr = remcom_out_buffer;
 		*ptr++ = 'T';
-		ptr = pack_hex_byte(ptr, ks->signo);
+		ptr = hex_byte_pack(ptr, ks->signo);
 		ptr += strlen(strcpy(ptr, "thread:"));
 		int_to_threadref(thref, shadow_pid(current->pid));
 		ptr = pack_threadid(ptr, thref);
-- 
cgit v1.2.3


From 33ef6b6984403a688189317ef46bb3caab3b70e0 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 2 Nov 2011 13:38:05 -0700
Subject: cgroups: more safe tasklist locking in cgroup_attach_proc

Fix unstable tasklist locking in cgroup_attach_proc.

According to this thread - https://lkml.org/lkml/2011/7/27/243 - RCU is
not sufficient to guarantee the tasklist is stable w.r.t.  de_thread and
exit.  Taking tasklist_lock for reading, instead of rcu_read_lock, ensures
proper exclusion.

Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 453100a4159d..64b0e73402df 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2027,7 +2027,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 		goto out_free_group_list;
 
 	/* prevent changes to the threadgroup list while we take a snapshot. */
-	rcu_read_lock();
+	read_lock(&tasklist_lock);
 	if (!thread_group_leader(leader)) {
 		/*
 		 * a race with de_thread from another thread's exec() may strip
@@ -2036,7 +2036,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 		 * throw this task away and try again (from cgroup_procs_write);
 		 * this is "double-double-toil-and-trouble-check locking".
 		 */
-		rcu_read_unlock();
+		read_unlock(&tasklist_lock);
 		retval = -EAGAIN;
 		goto out_free_group_list;
 	}
@@ -2057,7 +2057,7 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 	} while_each_thread(leader, tsk);
 	/* remember the number of threads in the array for later. */
 	group_size = i;
-	rcu_read_unlock();
+	read_unlock(&tasklist_lock);
 
 	/*
 	 * step 1: check that we can legitimately attach to the cgroup.
-- 
cgit v1.2.3


From 77ceab8ea590d7dc6c8f055ce43dfebd74428107 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@andrew.cmu.edu>
Date: Wed, 2 Nov 2011 13:38:07 -0700
Subject: cgroups: don't attach task to subsystem if migration failed

If a task has exited to the point it has called cgroup_exit() already,
then we can't migrate it to another cgroup anymore.

This can happen when we are attaching a task to a new cgroup between the
call to ->can_attach_task() on subsystems and the migration that is
eventually tried in cgroup_task_migrate().

In this case cgroup_task_migrate() returns -ESRCH and we don't want to
attach the task to the subsystems because the attachment to the new cgroup
itself failed.

Fix this by only calling ->attach_task() on the subsystems if the cgroup
migration succeeded.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 64b0e73402df..8386b21224ef 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2135,14 +2135,17 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
 		oldcgrp = task_cgroup_from_root(tsk, root);
 		if (cgrp == oldcgrp)
 			continue;
-		/* attach each task to each subsystem */
-		for_each_subsys(root, ss) {
-			if (ss->attach_task)
-				ss->attach_task(cgrp, tsk);
-		}
 		/* if the thread is PF_EXITING, it can just get skipped. */
 		retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
-		BUG_ON(retval != 0 && retval != -ESRCH);
+		if (retval == 0) {
+			/* attach each task to each subsystem */
+			for_each_subsys(root, ss) {
+				if (ss->attach_task)
+					ss->attach_task(cgrp, tsk);
+			}
+		} else {
+			BUG_ON(retval != -ESRCH);
+		}
 	}
 	/* nothing is sensitive to fork() after this point. */
 
-- 
cgit v1.2.3


From 89e8a244b97e48f1f30e898b6f32acca477f2a13 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 2 Nov 2011 13:38:39 -0700
Subject: cpusets: avoid looping when storing to mems_allowed if one node
 remains set

{get,put}_mems_allowed() exist so that general kernel code may locklessly
access a task's set of allowable nodes without having the chance that a
concurrent write will cause the nodemask to be empty on configurations
where MAX_NUMNODES > BITS_PER_LONG.

This could incur a significant delay, however, especially in low memory
conditions because the page allocator is blocking and reclaim requires
get_mems_allowed() itself.  It is not atypical to see writes to
cpuset.mems take over 2 seconds to complete, for example.  In low memory
conditions, this is problematic because it's one of the most imporant
times to change cpuset.mems in the first place!

The only way a task's set of allowable nodes may change is through cpusets
by writing to cpuset.mems and when attaching a task to a generic code is
not reading the nodemask with get_mems_allowed() at the same time, and
then clearing all the old nodes.  This prevents the possibility that a
reader will see an empty nodemask at the same time the writer is storing a
new nodemask.

If at least one node remains unchanged, though, it's possible to simply
set all new nodes and then clear all the old nodes.  Changing a task's
nodemask is protected by cgroup_mutex so it's guaranteed that two threads
are not changing the same task's nodemask at the same time, so the
nodemask is guaranteed to be stored before another thread changes it and
determines whether a node remains set or not.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Paul Menage <paul@paulmenage.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpuset.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10131fdaff70..ed0ff443f036 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -949,6 +949,8 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 static void cpuset_change_task_nodemask(struct task_struct *tsk,
 					nodemask_t *newmems)
 {
+	bool masks_disjoint = !nodes_intersects(*newmems, tsk->mems_allowed);
+
 repeat:
 	/*
 	 * Allow tasks that have access to memory reserves because they have
@@ -963,7 +965,6 @@ repeat:
 	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
 	mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
 
-
 	/*
 	 * ensure checking ->mems_allowed_change_disable after setting all new
 	 * allowed nodes.
@@ -980,9 +981,11 @@ repeat:
 
 	/*
 	 * Allocation of memory is very fast, we needn't sleep when waiting
-	 * for the read-side.
+	 * for the read-side.  No wait is necessary, however, if at least one
+	 * node remains unchanged.
 	 */
-	while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
+	while (masks_disjoint &&
+			ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
 		task_unlock(tsk);
 		if (!task_curr(tsk))
 			yield();
-- 
cgit v1.2.3


From f1ecf06854a66ee663f4d4cf029c78cd62a15e04 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Wed, 2 Nov 2011 13:39:22 -0700
Subject: sysctl: add support for poll()

Adding support for poll() in sysctl fs allows userspace to receive
notifications of changes in sysctl entries.  This adds a infrastructure to
allow files in sysctl fs to be pollable and implements it for hostname and
domainname.

[akpm@linux-foundation.org: s/declare/define/ for definitions]
Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Greg KH <gregkh@suse.de>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c            |  2 ++
 kernel/utsname_sysctl.c | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 58459509b14c..d06c091e0345 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1286,6 +1286,7 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
 		memset(u->nodename + len, 0, sizeof(u->nodename) - len);
 		errno = 0;
 	}
+	uts_proc_notify(UTS_PROC_HOSTNAME);
 	up_write(&uts_sem);
 	return errno;
 }
@@ -1336,6 +1337,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
 		memset(u->domainname + len, 0, sizeof(u->domainname) - len);
 		errno = 0;
 	}
+	uts_proc_notify(UTS_PROC_DOMAINNAME);
 	up_write(&uts_sem);
 	return errno;
 }
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index a2cd77e70d4d..3b0d48ebf81d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -13,6 +13,7 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/sysctl.h>
+#include <linux/wait.h>
 
 static void *get_uts(ctl_table *table, int write)
 {
@@ -51,12 +52,19 @@ static int proc_do_uts_string(ctl_table *table, int write,
 	uts_table.data = get_uts(table, write);
 	r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
 	put_uts(table, write, uts_table.data);
+
+	if (write)
+		proc_sys_poll_notify(table->poll);
+
 	return r;
 }
 #else
 #define proc_do_uts_string NULL
 #endif
 
+static DEFINE_CTL_TABLE_POLL(hostname_poll);
+static DEFINE_CTL_TABLE_POLL(domainname_poll);
+
 static struct ctl_table uts_kern_table[] = {
 	{
 		.procname	= "ostype",
@@ -85,6 +93,7 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.nodename),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
+		.poll		= &hostname_poll,
 	},
 	{
 		.procname	= "domainname",
@@ -92,6 +101,7 @@ static struct ctl_table uts_kern_table[] = {
 		.maxlen		= sizeof(init_uts_ns.name.domainname),
 		.mode		= 0644,
 		.proc_handler	= proc_do_uts_string,
+		.poll		= &domainname_poll,
 	},
 	{}
 };
@@ -105,6 +115,19 @@ static struct ctl_table uts_root_table[] = {
 	{}
 };
 
+#ifdef CONFIG_PROC_SYSCTL
+/*
+ * Notify userspace about a change in a certain entry of uts_kern_table,
+ * identified by the parameter proc.
+ */
+void uts_proc_notify(enum uts_proc proc)
+{
+	struct ctl_table *table = &uts_kern_table[proc];
+
+	proc_sys_poll_notify(table->poll);
+}
+#endif
+
 static int __init utsname_sysctl_init(void)
 {
 	register_sysctl_table(uts_root_table);
-- 
cgit v1.2.3


From c1e2ee2dc436574880758b3836fc96935b774c32 Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@google.com>
Date: Wed, 2 Nov 2011 13:40:29 -0700
Subject: memcg: replace ss->id_lock with a rwlock

While back-porting Johannes Weiner's patch "mm: memcg-aware global
reclaim" for an internal effort, we noticed a significant performance
regression during page-reclaim heavy workloads due to high contention of
the ss->id_lock.  This lock protects idr map, and serializes calls to
idr_get_next() in css_get_next() (which is used during the memcg hierarchy
walk).

Since idr_get_next() is just doing a look up, we need only serialize it
with respect to idr_remove()/idr_get_new().  By making the ss->id_lock a
rwlock, contention is greatly reduced and performance improves.

Tested: cat a 256m file from a ramdisk in a 128m container 50 times on
each core (one file + container per core) in parallel on a NUMA machine.
Result is the time for the test to complete in 1 of the containers.
Both kernels included Johannes' memcg-aware global reclaim patches.

Before rwlock patch: 1710.778s
After rwlock patch: 152.227s

Signed-off-by: Andrew Bresticker <abrestic@google.com>
Cc: Paul Menage <menage@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cgroup.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8386b21224ef..d9d5648f3cdc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4883,9 +4883,9 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
 
 	rcu_assign_pointer(id->css, NULL);
 	rcu_assign_pointer(css->id, NULL);
-	spin_lock(&ss->id_lock);
+	write_lock(&ss->id_lock);
 	idr_remove(&ss->idr, id->id);
-	spin_unlock(&ss->id_lock);
+	write_unlock(&ss->id_lock);
 	kfree_rcu(id, rcu_head);
 }
 EXPORT_SYMBOL_GPL(free_css_id);
@@ -4911,10 +4911,10 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
 		error = -ENOMEM;
 		goto err_out;
 	}
-	spin_lock(&ss->id_lock);
+	write_lock(&ss->id_lock);
 	/* Don't use 0. allocates an ID of 1-65535 */
 	error = idr_get_new_above(&ss->idr, newid, 1, &myid);
-	spin_unlock(&ss->id_lock);
+	write_unlock(&ss->id_lock);
 
 	/* Returns error when there are no free spaces for new ID.*/
 	if (error) {
@@ -4929,9 +4929,9 @@ static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
 	return newid;
 remove_idr:
 	error = -ENOSPC;
-	spin_lock(&ss->id_lock);
+	write_lock(&ss->id_lock);
 	idr_remove(&ss->idr, myid);
-	spin_unlock(&ss->id_lock);
+	write_unlock(&ss->id_lock);
 err_out:
 	kfree(newid);
 	return ERR_PTR(error);
@@ -4943,7 +4943,7 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
 {
 	struct css_id *newid;
 
-	spin_lock_init(&ss->id_lock);
+	rwlock_init(&ss->id_lock);
 	idr_init(&ss->idr);
 
 	newid = get_new_cssid(ss, 0);
@@ -5038,9 +5038,9 @@ css_get_next(struct cgroup_subsys *ss, int id,
 		 * scan next entry from bitmap(tree), tmpid is updated after
 		 * idr_get_next().
 		 */
-		spin_lock(&ss->id_lock);
+		read_lock(&ss->id_lock);
 		tmp = idr_get_next(&ss->idr, &tmpid);
-		spin_unlock(&ss->id_lock);
+		read_unlock(&ss->id_lock);
 
 		if (!tmp)
 			break;
-- 
cgit v1.2.3


From 4536e4d1d21c8172402a2217b0fa1880665ace36 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 3 Nov 2011 07:44:04 -0700
Subject: Revert "perf: Add PM notifiers to fix CPU hotplug races"

This reverts commit 144060fee07e9c22e179d00819c83c86fbcbf82c.

It causes a resume regression for Andi on his Acer Aspire 1830T post
3.1.  The screen just stays black after wakeup.

Also, it really looks like the wrong way to suspend and resume perf
events: I think they should be done as part of the CPU suspend and
resume, rather than as a notifier that does smp_call_function().

Reported-by: Andi Kleen <andi@firstfloor.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/events/core.c | 97 ++--------------------------------------------------
 1 file changed, 2 insertions(+), 95 deletions(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 12a0287e0358..e1253faa34dd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,7 +29,6 @@
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
 #include <linux/uaccess.h>
-#include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/anon_inodes.h>
 #include <linux/kernel_stat.h>
@@ -6853,7 +6852,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
 	mutex_lock(&swhash->hlist_mutex);
-	if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
+	if (swhash->hlist_refcount > 0) {
 		struct swevent_hlist *hlist;
 
 		hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6942,14 +6941,7 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 
-	/*
-	 * Ignore suspend/resume action, the perf_pm_notifier will
-	 * take care of that.
-	 */
-	if (action & CPU_TASKS_FROZEN)
-		return NOTIFY_OK;
-
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 
 	case CPU_UP_PREPARE:
 	case CPU_DOWN_FAILED:
@@ -6968,90 +6960,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-static void perf_pm_resume_cpu(void *unused)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
-	struct pmu *pmu;
-	int idx;
-
-	idx = srcu_read_lock(&pmus_srcu);
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-		ctx = cpuctx->task_ctx;
-
-		perf_ctx_lock(cpuctx, ctx);
-		perf_pmu_disable(cpuctx->ctx.pmu);
-
-		cpu_ctx_sched_out(cpuctx, EVENT_ALL);
-		if (ctx)
-			ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-
-		perf_pmu_enable(cpuctx->ctx.pmu);
-		perf_ctx_unlock(cpuctx, ctx);
-	}
-	srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static void perf_pm_suspend_cpu(void *unused)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
-	struct pmu *pmu;
-	int idx;
-
-	idx = srcu_read_lock(&pmus_srcu);
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-		ctx = cpuctx->task_ctx;
-
-		perf_ctx_lock(cpuctx, ctx);
-		perf_pmu_disable(cpuctx->ctx.pmu);
-
-		perf_event_sched_in(cpuctx, ctx, current);
-
-		perf_pmu_enable(cpuctx->ctx.pmu);
-		perf_ctx_unlock(cpuctx, ctx);
-	}
-	srcu_read_unlock(&pmus_srcu, idx);
-}
-
-static int perf_resume(void)
-{
-	get_online_cpus();
-	smp_call_function(perf_pm_resume_cpu, NULL, 1);
-	put_online_cpus();
-
-	return NOTIFY_OK;
-}
-
-static int perf_suspend(void)
-{
-	get_online_cpus();
-	smp_call_function(perf_pm_suspend_cpu, NULL, 1);
-	put_online_cpus();
-
-	return NOTIFY_OK;
-}
-
-static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
-{
-	switch (action) {
-	case PM_POST_HIBERNATION:
-	case PM_POST_SUSPEND:
-		return perf_resume();
-	case PM_HIBERNATION_PREPARE:
-	case PM_SUSPEND_PREPARE:
-		return perf_suspend();
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static struct notifier_block perf_pm_notifier = {
-	.notifier_call = perf_pm,
-};
-
 void __init perf_event_init(void)
 {
 	int ret;
@@ -7066,7 +6974,6 @@ void __init perf_event_init(void)
 	perf_tp_register();
 	perf_cpu_notifier(perf_cpu_notify);
 	register_reboot_notifier(&perf_reboot_notifier);
-	register_pm_notifier(&perf_pm_notifier);
 
 	ret = init_hw_breakpoint();
 	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
-- 
cgit v1.2.3


From 79cfbdfa87e84992d509e6c1648a18e1d7e68c20 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 3 Nov 2011 00:59:25 +0100
Subject: PM / Sleep: Fix race between CPU hotplug and freezer

The CPU hotplug notifications sent out by the _cpu_up() and _cpu_down()
functions depend on the value of the 'tasks_frozen' argument passed to them
(which indicates whether tasks have been frozen or not).
(Examples for such CPU hotplug notifications: CPU_ONLINE, CPU_ONLINE_FROZEN,
CPU_DEAD, CPU_DEAD_FROZEN).

Thus, it is essential that while the callbacks for those notifications are
running, the state of the system with respect to the tasks being frozen or
not remains unchanged, *throughout that duration*. Hence there is a need for
synchronizing the CPU hotplug code with the freezer subsystem.

Since the freezer is involved only in the Suspend/Hibernate call paths, this
patch hooks the CPU hotplug code to the suspend/hibernate notifiers
PM_[SUSPEND|HIBERNATE]_PREPARE and PM_POST_[SUSPEND|HIBERNATE] to prevent
the race between CPU hotplug and freezer, thus ensuring that CPU hotplug
notifications will always be run with the state of the system really being
what the notifications indicate, _throughout_ their execution time.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/cpu.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 12b7458f23b1..aa39dd7a3846 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,6 +15,7 @@
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
 #include <linux/gfp.h>
+#include <linux/suspend.h>
 
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -476,6 +477,79 @@ static int alloc_frozen_cpus(void)
 	return 0;
 }
 core_initcall(alloc_frozen_cpus);
+
+/*
+ * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU
+ * hotplug when tasks are about to be frozen. Also, don't allow the freezer
+ * to continue until any currently running CPU hotplug operation gets
+ * completed.
+ * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the
+ * 'cpu_add_remove_lock'. And this same lock is also taken by the regular
+ * CPU hotplug path and released only after it is complete. Thus, we
+ * (and hence the freezer) will block here until any currently running CPU
+ * hotplug operation gets completed.
+ */
+void cpu_hotplug_disable_before_freeze(void)
+{
+	cpu_maps_update_begin();
+	cpu_hotplug_disabled = 1;
+	cpu_maps_update_done();
+}
+
+
+/*
+ * When tasks have been thawed, re-enable regular CPU hotplug (which had been
+ * disabled while beginning to freeze tasks).
+ */
+void cpu_hotplug_enable_after_thaw(void)
+{
+	cpu_maps_update_begin();
+	cpu_hotplug_disabled = 0;
+	cpu_maps_update_done();
+}
+
+/*
+ * When callbacks for CPU hotplug notifications are being executed, we must
+ * ensure that the state of the system with respect to the tasks being frozen
+ * or not, as reported by the notification, remains unchanged *throughout the
+ * duration* of the execution of the callbacks.
+ * Hence we need to prevent the freezer from racing with regular CPU hotplug.
+ *
+ * This synchronization is implemented by mutually excluding regular CPU
+ * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
+ * Hibernate notifications.
+ */
+static int
+cpu_hotplug_pm_callback(struct notifier_block *nb,
+			unsigned long action, void *ptr)
+{
+	switch (action) {
+
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+		cpu_hotplug_disable_before_freeze();
+		break;
+
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		cpu_hotplug_enable_after_thaw();
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+
+int cpu_hotplug_pm_sync_init(void)
+{
+	pm_notifier(cpu_hotplug_pm_callback, 0);
+	return 0;
+}
+core_initcall(cpu_hotplug_pm_sync_init);
+
 #endif /* CONFIG_PM_SLEEP_SMP */
 
 /**
-- 
cgit v1.2.3


From 6513fd6972f725291ee8ce62c7a39fb8a6c7391e Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Thu, 3 Nov 2011 10:12:36 +0100
Subject: PM / QoS: Remove redundant check

Remove an "if" check, that repeats an equivalent one 6 lines above.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/qos.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 1c1797dd1d1d..5167d996cd02 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -386,8 +386,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
 		pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
 		filp->private_data = req;
 
-		if (filp->private_data)
-			return 0;
+		return 0;
 	}
 	return -EPERM;
 }
-- 
cgit v1.2.3


From d6cc76856d353a3a9c43bead33210b9216dce332 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 4 Nov 2011 01:04:52 +0100
Subject: PM / Freezer: Revert 27920651fe "PM / Freezer: Make
 fake_signal_wake_up() wake TASK_KILLABLE tasks too"

Commit 27920651fe "PM / Freezer: Make fake_signal_wake_up() wake
TASK_KILLABLE tasks too" updated fake_signal_wake_up() used by freezer
to wake up KILLABLE tasks.  Sending unsolicited wakeups to tasks in
killable sleep is dangerous as there are code paths which depend on
tasks not waking up spuriously from KILLABLE sleep.

For example. sys_read() or page can sleep in TASK_KILLABLE assuming
that wait/down/whatever _killable can only fail if we can not return
to the usermode.  TASK_TRACED is another obvious example.

The previous patch updated wait_event_freezekillable() such that it
doesn't depend on the spurious wakeup.  This patch reverts the
offending commit.

Note that the spurious KILLABLE wakeup had other implicit effects in
KILLABLE sleeps in nfs and cifs and those will need further updates to
regain freezekillable behavior.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/freezer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 66a594e8ad2f..7b01de98bb6a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -67,7 +67,7 @@ static void fake_signal_wake_up(struct task_struct *p)
 	unsigned long flags;
 
 	spin_lock_irqsave(&p->sighand->siglock, flags);
-	signal_wake_up(p, 1);
+	signal_wake_up(p, 0);
 	spin_unlock_irqrestore(&p->sighand->siglock, flags);
 }
 
-- 
cgit v1.2.3


From 1cd0d6c3021c8d76641b37203f504634b87fbabc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 1 Nov 2011 03:59:33 +0000
Subject: module: Enable dynamic debugging regardless of taint

Dynamic debugging is currently disabled for tainted modules, except
for TAINT_CRAP.  This prevents use of dynamic debugging for
out-of-tree modules once the next patch is applied.

This condition was apparently intended to avoid a crash if a force-
loaded module has an incompatible definition of dynamic debug
structures.  However, a administrator that forces us to load a module
is claiming that it *is* compatible even though it fails our version
checks.  If they are mistaken, there are any number of ways the module
could crash the system.

As a side-effect, proprietary and other tainted modules can now use
dynamic_debug.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/module.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 93342d992f34..3c5509642847 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2878,8 +2878,7 @@ static struct module *load_module(void __user *umod,
 	}
 
 	/* This has to be done once we're sure module name is unique. */
-	if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
-		dynamic_debug_setup(info.debug, info.num_debug);
+	dynamic_debug_setup(info.debug, info.num_debug);
 
 	/* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -2915,8 +2914,7 @@ static struct module *load_module(void __user *umod,
 	module_bug_cleanup(mod);
 
  ddebug:
-	if (!mod->taints || mod->taints == (1U<<TAINT_CRAP))
-		dynamic_debug_remove(info.debug);
+	dynamic_debug_remove(info.debug);
  unlock:
 	mutex_unlock(&module_mutex);
 	synchronize_sched();
-- 
cgit v1.2.3


From 2449b8ba0745327c5fa49a8d9acffe03b2eded69 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 24 Oct 2011 15:12:28 +0200
Subject: module,bug: Add TAINT_OOT_MODULE flag for modules not built in-tree

Use of the GPL or a compatible licence doesn't necessarily make the code
any good.  We already consider staging modules to be suspect, and this
should also be true for out-of-tree modules which may receive very
little review.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Reviewed-by: Dave Jones <davej@redhat.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (patched oops-tracing.txt)
---
 kernel/module.c | 5 +++++
 kernel/panic.c  | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 3c5509642847..ef8cb70c6996 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2487,6 +2487,9 @@ static int check_modinfo(struct module *mod, struct load_info *info)
 		return -ENOEXEC;
 	}
 
+	if (!get_modinfo(info, "intree"))
+		add_taint_module(mod, TAINT_OOT_MODULE);
+
 	if (get_modinfo(info, "staging")) {
 		add_taint_module(mod, TAINT_CRAP);
 		printk(KERN_WARNING "%s: module is from the staging directory,"
@@ -3255,6 +3258,8 @@ static char *module_flags(struct module *mod, char *buf)
 		buf[bx++] = '(';
 		if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
 			buf[bx++] = 'P';
+		else if (mod->taints & (1 << TAINT_OOT_MODULE))
+			buf[bx++] = 'O';
 		if (mod->taints & (1 << TAINT_FORCED_MODULE))
 			buf[bx++] = 'F';
 		if (mod->taints & (1 << TAINT_CRAP))
diff --git a/kernel/panic.c b/kernel/panic.c
index d7bb6974efb5..b26593604214 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -177,6 +177,7 @@ static const struct tnt tnts[] = {
 	{ TAINT_WARN,			'W', ' ' },
 	{ TAINT_CRAP,			'C', ' ' },
 	{ TAINT_FIRMWARE_WORKAROUND,	'I', ' ' },
+	{ TAINT_OOT_MODULE,		'O', ' ' },
 };
 
 /**
@@ -194,6 +195,7 @@ static const struct tnt tnts[] = {
  *  'W' - Taint on warning.
  *  'C' - modules from drivers/staging are loaded.
  *  'I' - Working around severe firmware bug.
+ *  'O' - Out-of-tree module has been loaded.
  *
  *	The string is overwritten by the next call to print_tainted().
  */
-- 
cgit v1.2.3


From a6f05b97d1ba87326bd96f3da9fef994830d6994 Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Sun, 6 Nov 2011 21:54:12 +0100
Subject: PM / QoS: Set cpu_dma_pm_qos->name

Since commit 4a31a334, the name of this misc device is not initialized,
which leads to a funny device named /dev/(null) being created and
/proc/misc containing an entry with just a number but no name. The latter
leads to complaints by cryptsetup, which caused me to investigate this
matter.

Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/qos.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 56db75147186..995e3bd3417b 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -70,6 +70,7 @@ static struct pm_qos_constraints cpu_dma_constraints = {
 };
 static struct pm_qos_object cpu_dma_pm_qos = {
 	.constraints = &cpu_dma_constraints,
+	.name = "cpu_dma_latency",
 };
 
 static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
-- 
cgit v1.2.3