From f629299b544b6cc12b4e3e85fec96f4ce5809482 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sun, 24 Jul 2011 23:15:42 +0200
Subject: trace events: Update version number reference to new 3.x scheme for
 EVENT_POWER_TRACING_DEPRECATED

What was scheduled to be 2.6.41 is now going to be 3.1 .

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1107250929370.8080@swampdragon.chaosbits.net
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2ad39e556cb4..cd3134510f3d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
 	  power:power_frequency
 	  This is for userspace compatibility
 	  and will vanish after 5 kernel iterations,
-	  namely 2.6.41.
+	  namely 3.1.
 
 config CONTEXT_SWITCH_TRACER
 	bool
-- 
cgit v1.2.3


From 1dd75f91ae713049eb6baaa640078f3a6549e522 Mon Sep 17 00:00:00 2001
From: "jhbird.choi@samsung.com" <jhbird.choi@samsung.com>
Date: Thu, 21 Jul 2011 15:29:14 +0900
Subject: genirq: Fix wrong bit operation

(!msk & 0x01) should be !(msk & 0x01)

Signed-off-by: Jonghwan Choi <jhbird.choi@samsung.com>
Link: http://lkml.kernel.org/r/1311229754-6003-1-git-send-email-jhbird.choi@samsung.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@kernel.org
---
 kernel/irq/generic-chip.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 3a2cab407b93..e38544dddb18 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
 		gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
 
 	for (i = gc->irq_base; msk; msk >>= 1, i++) {
-		if (!msk & 0x01)
+		if (!(msk & 0x01))
 			continue;
 
 		if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
 	raw_spin_unlock(&gc_lock);
 
 	for (; msk; msk >>= 1, i++) {
-		if (!msk & 0x01)
+		if (!(msk & 0x01))
 			continue;
 
 		/* Remove handler first. That will mask the irq line */
-- 
cgit v1.2.3


From f3637a5f2e2eb391ff5757bc83fb5de8f9726464 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 7 Jul 2011 22:32:17 +0200
Subject: irq: Always set IRQF_ONESHOT if no primary handler is specified

If no primary handler is specified then a default one is assigned
which always returns IRQ_WAKE_THREAD. This handler requires the
IRQF_ONESHOT flag on LEVEL / EIO typed irqs because the source of
interrupt is not disabled. Since it is required for those users and
there is no difference for others it makes sense to add this flag
unconditionally.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/1310070737-18514-1-git-send-email-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/manage.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a7840aeb0fb..3f9cd4799da7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1322,6 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 		if (!thread_fn)
 			return -EINVAL;
 		handler = irq_default_primary_handler;
+		irqflags |= IRQF_ONESHOT;
 	}
 
 	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
-- 
cgit v1.2.3


From b6873807a7143b7d6d8b06809295e559d07d7deb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Mon, 11 Jul 2011 12:17:31 +0200
Subject: irq: Track the owner of irq descriptor

Interrupt descriptors can be allocated from modules. The interrupts
are used by other modules, but we have no refcount on the module which
provides the interrupts and there is no way to establish one on the
device level as the interrupt using module is agnostic to the fact
that the interrupt is provided by a module rather than by some builtin
interrupt controller.

To prevent removal of the interrupt providing module, we can track the
owner of the interrupt descriptor, which also provides the relevant
irq chip functions in the irq descriptor.

request/setup_irq() can now acquire a refcount on the owner module to
prevent unloading. free_irq() drops the refcount.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Link: http://lkml.kernel.org/r/20110711101731.GA13804@Chamillionaire.breakpoint.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/irqdesc.c | 36 ++++++++++++++++++++++++------------
 kernel/irq/manage.c  | 17 +++++++++++++----
 2 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4c60a50e66b2..cb65d0360e31 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
 static inline int desc_node(struct irq_desc *desc) { return 0; }
 #endif
 
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+		struct module *owner)
 {
 	int cpu;
 
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
 	desc->name = NULL;
+	desc->owner = owner;
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
 	desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
 static inline void free_masks(struct irq_desc *desc) { }
 #endif
 
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
 {
 	struct irq_desc *desc;
 	gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
 	raw_spin_lock_init(&desc->lock);
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 
-	desc_set_defaults(irq, desc, node);
+	desc_set_defaults(irq, desc, node, owner);
 
 	return desc;
 
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
 	kfree(desc);
 }
 
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+		       struct module *owner)
 {
 	struct irq_desc *desc;
 	int i;
 
 	for (i = 0; i < cnt; i++) {
-		desc = alloc_desc(start + i, node);
+		desc = alloc_desc(start + i, node, owner);
 		if (!desc)
 			goto err;
 		mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
 		nr_irqs = initcnt;
 
 	for (i = 0; i < initcnt; i++) {
-		desc = alloc_desc(i, node);
+		desc = alloc_desc(i, node, NULL);
 		set_bit(i, allocated_irqs);
 		irq_insert_desc(i, desc);
 	}
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
 		alloc_masks(&desc[i], GFP_KERNEL, node);
 		raw_spin_lock_init(&desc[i].lock);
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-		desc_set_defaults(i, &desc[i], node);
+		desc_set_defaults(i, &desc[i], node, NULL);
 	}
 	return arch_early_irq_init();
 }
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 }
 
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+			      struct module *owner)
 {
+	u32 i;
+
+	for (i = 0; i < cnt; i++) {
+		struct irq_desc *desc = irq_to_desc(start + i);
+
+		desc->owner = owner;
+	}
 	return start;
 }
 
@@ -337,7 +348,8 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
  * Returns the first irq number or error code
  */
 int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+		  struct module *owner)
 {
 	int start, ret;
 
@@ -366,13 +378,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
 
 	bitmap_set(allocated_irqs, start, cnt);
 	mutex_unlock(&sparse_irq_lock);
-	return alloc_descs(start, cnt, node);
+	return alloc_descs(start, cnt, node, owner);
 
 err:
 	mutex_unlock(&sparse_irq_lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
 
 /**
  * irq_reserve_irqs - mark irqs allocated
@@ -440,7 +452,7 @@ void dynamic_irq_cleanup(unsigned int irq)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc_set_defaults(irq, desc, desc_node(desc));
+	desc_set_defaults(irq, desc, desc_node(desc), NULL);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 3f9cd4799da7..2e9425889fa8 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 	if (desc->irq_data.chip == &no_irq_chip)
 		return -ENOSYS;
+	if (!try_module_get(desc->owner))
+		return -ENODEV;
 	/*
 	 * Some drivers like serial.c use request_irq() heavily,
 	 * so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	 */
 	nested = irq_settings_is_nested_thread(desc);
 	if (nested) {
-		if (!new->thread_fn)
-			return -EINVAL;
+		if (!new->thread_fn) {
+			ret = -EINVAL;
+			goto out_mput;
+		}
 		/*
 		 * Replace the primary handler which was provided from
 		 * the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
 				   new->name);
-		if (IS_ERR(t))
-			return PTR_ERR(t);
+		if (IS_ERR(t)) {
+			ret = PTR_ERR(t);
+			goto out_mput;
+		}
 		/*
 		 * We keep the reference to the task struct even if
 		 * the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
 			kthread_stop(t);
 		put_task_struct(t);
 	}
+out_mput:
+	module_put(desc->owner);
 	return ret;
 }
 
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		put_task_struct(action->thread);
 	}
 
+	module_put(desc->owner);
 	return action;
 }
 
-- 
cgit v1.2.3


From b77f0f3c1f587791aa5d9bd1b0012c9a89eb9258 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 5 Aug 2011 16:40:40 -0400
Subject: jump label: Reduce the cycle count by changing the link order

In the course of testing jump labels for use with the CFS
bandwidth controller, Paul Turner, discovered that using jump
labels reduced the branch count and the instruction count, but
did not reduce the cycle count or wall time.

I noticed that having the jump_label.o included in the kernel
but not used in any way still caused this increase in cycle
count and wall time. Thus, I moved jump_label.o in the
kernel/Makefile, thus changing the link order, and presumably
moving it out of hot icache areas. This brought down the cycle
count/time as expected.

In addition to Paul's testing,  I've tested the patch using a
single 'static_branch()' in the getppid() path, and basically
running tight loops of calls to getppid(). Here are my results
for the branch disabled case:

With jump labels turned on (CONFIG_JUMP_LABEL), branch disabled:

 Performance counter stats for 'bash -c /tmp/getppid;true' (50 runs):

     3,969,510,217 instructions             #	   0.864 IPC     ( +-0.000% )
     4,592,334,954 cycles                     ( +-   0.046% )
       751,634,470 branches                   ( +-   0.000% )

        1.722635797  seconds time elapsed   ( +-   0.046% )

Jump labels turned off (CONFIG_JUMP_LABEL not set), branch
disabled:

 Performance counter stats for 'bash -c /tmp/getppid;true' (50 runs):

     4,009,611,846 instructions             #	   0.867 IPC     ( +-0.000% )
     4,622,210,580 cycles                     ( +-   0.012% )
       771,662,904 branches                   ( +-   0.000% )

        1.734341454  seconds time elapsed   ( +-   0.022% )

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: rth@redhat.com
Cc: a.p.zijlstra@chello.nl
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20110805204040.GG2522@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Paul Turner <pjt@google.com>
---
 kernel/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index d06467fc8f7c..eca595e2fd52 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-	    async.o range.o jump_label.o
+	    async.o range.o
 obj-y += groups.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
-- 
cgit v1.2.3


From 80e0401e35410a69bfae05b454db8a7187edd6b8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 5 Aug 2011 14:26:17 +0200
Subject: lockdep: Fix wrong assumption in match_held_lock

match_held_lock() was assuming it was being called on a lock class
that had already seen usage.

This condition was true for bug-free code using lockdep_assert_held(),
since you're in fact holding the lock when calling it. However the
assumption fails the moment you assume the assertion can fail, which
is the whole point of having the assertion in the first place.

Anyway, now that there's more lockdep_is_held() users, notably
__rcu_dereference_check(), its much easier to trigger this since we
test for a number of locks and we only need to hold any one of them to
be good.

Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1312547787.28695.2.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/lockdep.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8c24294e477f..91d67ce3a8d5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
 		if (!class)
 			class = look_up_lock_class(lock, 0);
 
-		if (DEBUG_LOCKS_WARN_ON(!class))
+		/*
+		 * If look_up_lock_class() failed to find a class, we're trying
+		 * to test if we hold a lock that has never yet been acquired.
+		 * Clearly if the lock hasn't been acquired _ever_, we're not
+		 * holding it either, so report failure.
+		 */
+		if (!class)
 			return 0;
 
 		if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
-- 
cgit v1.2.3


From f2c0d0266cc5eb36a4aa44944b4096ec121490aa Mon Sep 17 00:00:00 2001
From: Jonathan Nieder <jrnieder@gmail.com>
Date: Mon, 8 Aug 2011 06:22:43 +0200
Subject: cap_syslog: don't use WARN_ONCE for CAP_SYS_ADMIN deprecation warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

syslog-ng versions before 3.3.0beta1 (2011-05-12) assume that
CAP_SYS_ADMIN is sufficient to access syslog, so ever since CAP_SYSLOG
was introduced (2010-11-25) they have triggered a warning.

Commit ee24aebffb75 ("cap_syslog: accept CAP_SYS_ADMIN for now")
improved matters a little by making syslog-ng work again, just keeping
the WARN_ONCE().  But still, this is a warning that writes a stack trace
we don't care about to syslog, sets a taint flag, and alarms sysadmins
when nothing worse has happened than use of an old userspace with a
recent kernel.

Convert the WARN_ONCE to a printk_once to avoid that while continuing to
give userspace developers a hint that this is an unwanted
backward-compatibility feature and won't be around forever.

Reported-by: Ralf Hildebrandt <ralf.hildebrandt@charite.de>
Reported-by: Niels <zorglub_olsen@hotmail.com>
Reported-by: Paweł Sikora <pluto@agmk.net>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Liked-by: Gergely Nagy <algernon@madhouse-project.org>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 37dff3429adb..836a2ae0ac31 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file)
 			return 0;
 		/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
 		if (capable(CAP_SYS_ADMIN)) {
-			WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
-				 "but no CAP_SYSLOG (deprecated).\n");
+			printk_once(KERN_WARNING "%s (%d): "
+				 "Attempt to access syslog with CAP_SYS_ADMIN "
+				 "but no CAP_SYSLOG (deprecated).\n",
+				 current->comm, task_pid_nr(current));
 			return 0;
 		}
 		return -EPERM;
-- 
cgit v1.2.3


From c09c47caedc9854d59378d6e34c989e51cfdd2b4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Thu, 11 Aug 2011 10:36:05 +0200
Subject: blktrace: add FLUSH/FUA support

Add FLUSH/FUA support to blktrace. As FLUSH precedes WRITE and/or
FUA follows WRITE, use the same 'F' flag for both cases and
distinguish them by their (relative) position. The end results
look like (other flags might be shown also):

 - WRITE:            W
 - WRITE_FLUSH:      FW
 - WRITE_FUA:        WF
 - WRITE_FLUSH_FUA:  FWF

Note that we reuse TC_BARRIER due to lack of bit space of act_mask
so that the older versions of blktrace tools will report flush
requests as barriers from now on.

Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 kernel/trace/blktrace.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298dfa..7c910a5593a6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	what |= MASK_TC_BIT(rw, RAHEAD);
 	what |= MASK_TC_BIT(rw, META);
 	what |= MASK_TC_BIT(rw, DISCARD);
+	what |= MASK_TC_BIT(rw, FLUSH);
+	what |= MASK_TC_BIT(rw, FUA);
 
 	pid = tsk->pid;
 	if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
 		goto out;
 	}
 
+	if (tc & BLK_TC_FLUSH)
+		rwbs[i++] = 'F';
+
 	if (tc & BLK_TC_DISCARD)
 		rwbs[i++] = 'D';
 	else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
 	else
 		rwbs[i++] = 'N';
 
+	if (tc & BLK_TC_FUA)
+		rwbs[i++] = 'F';
 	if (tc & BLK_TC_AHEAD)
 		rwbs[i++] = 'A';
-	if (tc & BLK_TC_BARRIER)
-		rwbs[i++] = 'B';
 	if (tc & BLK_TC_SYNC)
 		rwbs[i++] = 'S';
 	if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
 
 static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
 {
-	char rwbs[6];
+	char rwbs[RWBS_LEN];
 	unsigned long long ts  = iter->ts;
 	unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
 	unsigned secs	       = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
 
 static int blk_log_action(struct trace_iterator *iter, const char *act)
 {
-	char rwbs[6];
+	char rwbs[RWBS_LEN];
 	const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
 
 	fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
 } mask_maps[] = {
 	{ BLK_TC_READ,		"read"		},
 	{ BLK_TC_WRITE,		"write"		},
-	{ BLK_TC_BARRIER,	"barrier"	},
+	{ BLK_TC_FLUSH,		"flush"		},
 	{ BLK_TC_SYNC,		"sync"		},
 	{ BLK_TC_QUEUE,		"queue"		},
 	{ BLK_TC_REQUEUE,	"requeue"	},
@@ -1573,6 +1578,7 @@ static const struct {
 	{ BLK_TC_META,		"meta"		},
 	{ BLK_TC_DISCARD,	"discard"	},
 	{ BLK_TC_DRV_DATA,	"drv_data"	},
+	{ BLK_TC_FUA,		"fua"		},
 };
 
 static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
 {
 	int i = 0;
 
+	if (rw & REQ_FLUSH)
+		rwbs[i++] = 'F';
+
 	if (rw & WRITE)
 		rwbs[i++] = 'W';
 	else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
 	else
 		rwbs[i++] = 'N';
 
+	if (rw & REQ_FUA)
+		rwbs[i++] = 'F';
 	if (rw & REQ_RAHEAD)
 		rwbs[i++] = 'A';
 	if (rw & REQ_SYNC)
-- 
cgit v1.2.3


From 72fa59970f8698023045ab0713d66f3f4f96945c Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Mon, 8 Aug 2011 19:02:04 +0400
Subject: move RLIMIT_NPROC check from set_user() to do_execve_common()

The patch http://lkml.org/lkml/2003/7/13/226 introduced an RLIMIT_NPROC
check in set_user() to check for NPROC exceeding via setuid() and
similar functions.

Before the check there was a possibility to greatly exceed the allowed
number of processes by an unprivileged user if the program relied on
rlimit only.  But the check created new security threat: many poorly
written programs simply don't check setuid() return code and believe it
cannot fail if executed with root privileges.  So, the check is removed
in this patch because of too often privilege escalations related to
buggy programs.

The NPROC can still be enforced in the common code flow of daemons
spawning user processes.  Most of daemons do fork()+setuid()+execve().
The check introduced in execve() (1) enforces the same limit as in
setuid() and (2) doesn't create similar security issues.

Neil Brown suggested to track what specific process has exceeded the
limit by setting PF_NPROC_EXCEEDED process flag.  With the change only
this process would fail on execve(), and other processes' execve()
behaviour is not changed.

Solar Designer suggested to re-check whether NPROC limit is still
exceeded at the moment of execve().  If the process was sleeping for
days between set*uid() and execve(), and the NPROC counter step down
under the limit, the defered execve() failure because NPROC limit was
exceeded days ago would be unexpected.  If the limit is not exceeded
anymore, we clear the flag on successful calls to execve() and fork().

The flag is also cleared on successful calls to set_user() as the limit
was exceeded for the previous user, not the current one.

Similar check was introduced in -ow patches (without the process flag).

v3 - clear PF_NPROC_EXCEEDED on successful calls to set_user().

Reviewed-by: James Morris <jmorris@namei.org>
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cred.c |  6 ++----
 kernel/fork.c |  1 +
 kernel/sys.c  | 15 +++++++++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cred.c b/kernel/cred.c
index 174fa84eca30..8ef31f53c44c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new)
 		key_fsgid_changed(task);
 
 	/* do it
-	 * - What if a process setreuid()'s and this brings the
-	 *   new uid over his NPROC rlimit?  We can check this now
-	 *   cheaply with the new uid cache, so if it matters
-	 *   we should be checking for it.  -DaveM
+	 * RLIMIT_NPROC limits on user->processes have already been checked
+	 * in set_user().
 	 */
 	alter_cred_subscribers(new, 2);
 	if (new->user != old->user)
diff --git a/kernel/fork.c b/kernel/fork.c
index e7ceaca89609..8e6b6f4fb272 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		    p->real_cred->user != INIT_USER)
 			goto bad_fork_free;
 	}
+	current->flags &= ~PF_NPROC_EXCEEDED;
 
 	retval = copy_creds(p, clone_flags);
 	if (retval < 0)
diff --git a/kernel/sys.c b/kernel/sys.c
index a101ba36c444..dd948a1fca4c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -621,11 +621,18 @@ static int set_user(struct cred *new)
 	if (!new_user)
 		return -EAGAIN;
 
+	/*
+	 * We don't fail in case of NPROC limit excess here because too many
+	 * poorly written programs don't check set*uid() return code, assuming
+	 * it never fails if called by root.  We may still enforce NPROC limit
+	 * for programs doing set*uid()+execve() by harmlessly deferring the
+	 * failure to the execve() stage.
+	 */
 	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
-			new_user != INIT_USER) {
-		free_uid(new_user);
-		return -EAGAIN;
-	}
+			new_user != INIT_USER)
+		current->flags |= PF_NPROC_EXCEEDED;
+	else
+		current->flags &= ~PF_NPROC_EXCEEDED;
 
 	free_uid(new->user);
 	new->user = new_user;
-- 
cgit v1.2.3


From c59d87c460767bc35dafd490139d3cfe78fb8da4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 12 Aug 2011 16:21:35 -0500
Subject: xfs: remove subdirectories

Use the move from Linux 2.6 to Linux 3.x as an excuse to kill the
annoying subdirectories in the XFS source code.  Besides the large
amount of file rename the only changes are to the Makefile, a few
files including headers with the subdirectory prefix, and the binary
sysctl compat code that includes a header under fs/xfs/ from
kernel/.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 kernel/sysctl_binary.c | 2 +-
 kernel/sysctl_check.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 3b8e028b9601..e8bffbe2ba4b 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a7b360..362da653813d 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/string.h>
 #include <net/ip_vs.h>
-- 
cgit v1.2.3


From 17f2ae7f677f023997e02fd2ebabd90ea2a0390d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 14 Aug 2011 13:34:31 +0200
Subject: PM / Domains: Fix build for CONFIG_PM_RUNTIME unset

Function genpd_queue_power_off_work() is not defined for
CONFIG_PM_RUNTIME, so pm_genpd_poweroff_unused() causes a build
error to happen in that case.  Fix the problem by making
pm_genpd_poweroff_unused() depend on CONFIG_PM_RUNTIME too.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b1914cb9095c..3744c594b19b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -231,3 +231,7 @@ config PM_CLK
 config PM_GENERIC_DOMAINS
 	bool
 	depends on PM
+
+config PM_GENERIC_DOMAINS_RUNTIME
+	def_bool y
+	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
-- 
cgit v1.2.3


From d522a0d17963e9c2e556db2cbd60c96d40505b6c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Thu, 18 Aug 2011 12:19:27 -0700
Subject: irqdesc: fix new kernel-doc warning

Fix kernel-doc warning in irqdesc.c:

  Warning(kernel/irq/irqdesc.c:353): No description found for parameter 'owner'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/irqdesc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index cb65d0360e31..039b889ea053 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -344,6 +344,7 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
  * @from:	Start the search from this irq number
  * @cnt:	Number of consecutive irqs to allocate.
  * @node:	Preferred node on which the irq descriptor should be allocated
+ * @owner:	Owning module (can be NULL)
  *
  * Returns the first irq number or error code
  */
-- 
cgit v1.2.3


From 69dd3d8e29e294caaf63eb5e8a72d250279f9e5f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 23 Aug 2011 10:36:51 -0700
Subject: Revert "irq: Always set IRQF_ONESHOT if no primary handler is
 specified"

This reverts commit f3637a5f2e2eb391ff5757bc83fb5de8f9726464.

It turns out that this breaks several drivers, one example being OMAP
boards which use the on-board OMAP UARTs and the omap-serial driver that
will not boot to userspace after the commit.

Paul Walmsley reports that enabling CONFIG_DEBUG_SHIRQ reveals 'IRQ
handler type mismatch' errors:

  IRQ handler type mismatch for IRQ 74
  current handler: serial idle
  ...

and the reason is that setting IRQF_ONESHOT will now result in those
interrupt handlers having different IRQF flags, and thus being
unsharable.  So the commit log in the reverted commit:

                            "Since it is required for those users and
    there is no difference for others it makes sense to add this flag
    unconditionally."

is simply not true: there may not be any difference from a "actions at
irq time", but there is a *big* difference wrt this flag testing irq
management (see __setup_irq() in kernel/irq/manage.c).

One solution may be to stop verifying IRQF_ONESHOT in __setup_irq(), but
right now the safe course of action is to revert the change.  Let's
revisit this in a later merge window.

Reported-by: Paul Walmsley <paul@pwsan.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Requested-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq/manage.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2e9425889fa8..9b956fa20308 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 		if (!thread_fn)
 			return -EINVAL;
 		handler = irq_default_primary_handler;
-		irqflags |= IRQF_ONESHOT;
 	}
 
 	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
-- 
cgit v1.2.3


From be27425dcc516fd08245b047ea57f83b8f6f0903 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 19 Aug 2011 16:15:10 -0700
Subject: Add a personality to report 2.6.x version numbers

I ran into a couple of programs which broke with the new Linux 3.0
version.  Some of those were binary only.  I tried to use LD_PRELOAD to
work around it, but it was quite difficult and in one case impossible
because of a mix of 32bit and 64bit executables.

For example, all kind of management software from HP doesnt work, unless
we pretend to run a 2.6 kernel.

  $ uname -a
  Linux svivoipvnx001 3.0.0-08107-g97cd98f #1062 SMP Fri Aug 12 18:11:45 CEST 2011 i686 i686 i386 GNU/Linux

  $ hpacucli ctrl all show

  Error: No controllers detected.

  $ rpm -qf /usr/sbin/hpacucli
  hpacucli-8.75-12.0

Another notable case is that Python now reports "linux3" from
sys.platform(); which in turn can break things that were checking
sys.platform() == "linux2":

  https://bugzilla.mozilla.org/show_bug.cgi?id=664564

It seems pretty clear to me though it's a bug in the apps that are using
'==' instead of .startswith(), but this allows us to unbreak broken
programs.

This patch adds a UNAME26 personality that makes the kernel report a
2.6.40+x version number instead.  The x is the x in 3.x.

I know this is somewhat ugly, but I didn't find a better workaround, and
compatibility to existing programs is important.

Some programs also read /proc/sys/kernel/osrelease.  This can be worked
around in user space with mount --bind (and a mount namespace)

To use:

  wget ftp://ftp.kernel.org/pub/linux/kernel/people/ak/uname26/uname26.c
  gcc -o uname26 uname26.c
  ./uname26 program

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index dd948a1fca4c..18ee1d2f6474 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,8 @@
 #include <linux/fs_struct.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -44,6 +46,8 @@
 #include <linux/user_namespace.h>
 
 #include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1161,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
 #define override_architecture(name)	0
 #endif
 
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+	int ret = 0;
+	char buf[len];
+
+	if (current->personality & UNAME26) {
+		char *rest = UTS_RELEASE;
+		int ndots = 0;
+		unsigned v;
+
+		while (*rest) {
+			if (*rest == '.' && ++ndots >= 3)
+				break;
+			if (!isdigit(*rest) && *rest != '.')
+				break;
+			rest++;
+		}
+		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+		snprintf(buf, len, "2.6.%u%s", v, rest);
+		ret = copy_to_user(release, buf, len);
+	}
+	return ret;
+}
+
 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 {
 	int errno = 0;
@@ -1170,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
 		errno = -EFAULT;
 	up_read(&uts_sem);
 
+	if (!errno && override_release(name->release, sizeof(name->release)))
+		errno = -EFAULT;
 	if (!errno && override_architecture(name))
 		errno = -EFAULT;
 	return errno;
@@ -1191,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
 		error = -EFAULT;
 	up_read(&uts_sem);
 
+	if (!error && override_release(name->release, sizeof(name->release)))
+		error = -EFAULT;
 	if (!error && override_architecture(name))
 		error = -EFAULT;
 	return error;
@@ -1225,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
 
 	if (!error && override_architecture(name))
 		error = -EFAULT;
+	if (!error && override_release(name->release, sizeof(name->release)))
+		error = -EFAULT;
 	return error ? -EFAULT : 0;
 }
 #endif
-- 
cgit v1.2.3


From 4c30c6f566c0989ddaee3407da44751e340a63ed Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Thu, 25 Aug 2011 15:59:11 -0700
Subject: kernel/printk: do not turn off bootconsole in printk_late_init() if
 keep_bootcon

It seems that 7bf693951a8e ("console: allow to retain boot console via
boot option keep_bootcon") doesn't always achieve what it aims, as when
printk_late_init() runs it unconditionally turns off all boot consoles.
With this patch, I am able to see more messages on the boot console in
KVM guests than I can without, when keep_bootcon is specified.

I think it is appropriate for the relevant -stable trees.  However, it's
more of an annoyance than a serious bug (ideally you don't need to keep
the boot console around as console handover should be working -- I was
encountering a situation where the console handover wasn't working and
not having the boot console available meant I couldn't see why).

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <gregkh@suse.de>
Acked-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Cc: <stable@kernel.org>		[2.6.39.x, 3.0.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 836a2ae0ac31..28a40d8171b8 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1604,7 +1604,7 @@ static int __init printk_late_init(void)
 	struct console *con;
 
 	for_each_console(con) {
-		if (con->flags & CON_BOOT) {
+		if (!keep_bootcon && con->flags & CON_BOOT) {
 			printk(KERN_INFO "turn off boot console %s%d\n",
 				con->name, con->index);
 			unregister_console(con);
-- 
cgit v1.2.3


From f5b940997397229975ea073679b03967932a541b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 26 Aug 2011 18:03:11 -0400
Subject: All Arch: remove linkage for sys_nfsservctl system call

The nfsservctl system call is now gone, so we should remove all
linkage for it.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys_ni.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 62cbc8877fef..a9a5de07c4f1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
 	return -ENOSYS;
 }
 
-cond_syscall(sys_nfsservctl);
 cond_syscall(sys_quotactl);
 cond_syscall(sys32_quotactl);
 cond_syscall(sys_acct);
-- 
cgit v1.2.3