From d34d2c97ae56961ca73fc8704aec2304bb820668 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 20 Jun 2017 12:12:49 +0100
Subject: cpufreq/sched: Use cpu max freq rather than policy max

When we convert capacity into frequency, we used policy->max to get
the max freq of the cpu. Since this can be changed by userspace policy
or thermal events, we are potentially asking for a lower frequency
than the utilization demands.

Change over to using cpuinfo.max which is the max freq supported by
that cpu rather than the currently-chosen max. Frequency granted still
honours the max policy.

Tested by setting a userspace policy and observing the relevant vars
in a trace. In this instance, we ask for around 1ghz instead of 620MHz.

freq_new=1013512
unfixed_freq_new=624487
capacity=546
cpuinfo_max=1900800
policy_max=1171200

Change-Id: I8c5694db42243c6fb78bb9be9046b06ac81295e7
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/cpufreq_sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
index 6ffb23adbcef..ec0aed7a8f96 100644
--- a/kernel/sched/cpufreq_sched.c
+++ b/kernel/sched/cpufreq_sched.c
@@ -202,7 +202,7 @@ static void update_fdomain_capacity_request(int cpu)
 	}
 
 	/* Convert the new maximum capacity request into a cpu frequency */
-	freq_new = capacity * policy->max >> SCHED_CAPACITY_SHIFT;
+	freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT;
 	if (cpufreq_frequency_table_target(policy, policy->freq_table,
 					   freq_new, CPUFREQ_RELATION_L,
 					   &index_new))
-- 
cgit v1.2.3


From c9391ba6ee7930ccfda71748cec2d8d210510cc8 Mon Sep 17 00:00:00 2001
From: Sherry Yang <sherryy@android.com>
Date: Thu, 5 Oct 2017 17:00:57 -0400
Subject: FROMLIST: android: binder: Change binder_shrinker to static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(from https://patchwork.kernel.org/patch/9990321/)

binder_shrinker struct is not used anywhere outside of
binder_alloc.c and should be static.

Bug: 63926541
Change-Id: I7a13d4ddbaaf3721cddfe1d860e34c7be80dd082
Acked-by: Arve Hjønnevåg <arve@android.com>
Signed-off-by: Sherry Yang <sherryy@android.com>
---
 drivers/android/binder_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index b95da16fd938..22cb64f8d96c 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -986,7 +986,7 @@ binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 	return ret;
 }
 
-struct shrinker binder_shrinker = {
+static struct shrinker binder_shrinker = {
 	.count_objects = binder_shrink_count,
 	.scan_objects = binder_shrink_scan,
 	.seeks = DEFAULT_SEEKS,
-- 
cgit v1.2.3


From a7e1d33c7a1101936cf7f048a0ef7cac62e32cd0 Mon Sep 17 00:00:00 2001
From: Sherry Yang <sherryy@android.com>
Date: Thu, 5 Oct 2017 17:13:47 -0400
Subject: FROMLIST: android: binder: Fix null ptr dereference in debug msg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(from https://patchwork.kernel.org/patch/9990323/)

Don't access next->data in kernel debug message when the
next buffer is null.

Bug: 36007193
Change-Id: Ib8240d7e9a7087a2256e88c0ae84b9df0f2d0224
Acked-by: Arve Hjønnevåg <arve@android.com>
Signed-off-by: Sherry Yang <sherryy@android.com>
---
 drivers/android/binder_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 22cb64f8d96c..e431cfcfb59b 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -561,7 +561,7 @@ static void binder_delete_free_buffer(struct binder_alloc *alloc,
 		binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 				   "%d: merge free, buffer %pK do not share page with %pK or %pK\n",
 				   alloc->pid, buffer->data,
-				   prev->data, next->data);
+				   prev->data, next ? next->data : NULL);
 		binder_update_page_range(alloc, 0, buffer_start_page(buffer),
 					 buffer_start_page(buffer) + PAGE_SIZE);
 	}
-- 
cgit v1.2.3


From 774481506a83d305353330e44b8e0f6ad80a34a8 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 15 Sep 2017 08:25:32 +0800
Subject: cpufreq: schedutil: clamp util to CPU maximum capacity

The code is to get the CPU util by accumulate different scheduling
classes and when the total util value is larger than CPU capacity
then it clamps util to CPU maximum capacity. So we can get correct util
value when use PELT signal but if with WALT signal it misses to clamp
util value.

On the other hand, WALT doesn't accumulate different class utilization
but it needs to applying boost margin for WALT signal the CPU util
value is possible to be larger than CPU capacity; so this patch is to
always clamp util to CPU maximum capacity.

Change-Id: I05481ddbf20246bb9be15b6bd21b6ec039015ea8
Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 kernel/sched/cpufreq_schedutil.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 28977799017b..d3765f0cb699 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -216,8 +216,9 @@ static void sugov_get_util(unsigned long *util, unsigned long *max, u64 time)
 
 	*util = boosted_cpu_util(cpu);
 	if (likely(use_pelt()))
-		*util = min((*util + rt), max_cap);
+		*util = *util + rt;
 
+	*util = min(*util, max_cap);
 	*max = max_cap;
 }
 
-- 
cgit v1.2.3


From 89805266af7825f6b8ccb8ff23a8e3aec4418dea Mon Sep 17 00:00:00 2001
From: Dmitry Shmidt <dimitrysh@google.com>
Date: Tue, 24 Oct 2017 12:42:10 -0700
Subject: Revert "UPSTREAM: efi/libstub/arm64: Set -fpie when building the EFI
 stub"

It break boot with UEFI bootloader

This reverts commit 2f2860a504a30a7645c6a0ec06767c5c7677a4ea.
---
 drivers/firmware/efi/libstub/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index d1f1bde85d3f..68bdd9f23e13 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -10,7 +10,7 @@ cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
 				   -fPIC -fno-strict-aliasing -mno-red-zone \
 				   -mno-mmx -mno-sse
 
-cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS)) -fpie
+cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS))
 cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic -mno-single-pic-base
 
-- 
cgit v1.2.3


From 4f8767d1ca307ab8c03889534a3c5a525ce7485d Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Wed, 25 Oct 2017 17:25:20 +0100
Subject: ANDROID: sched/fair: Select correct capacity state for energy_diff

The util returned from group_max_util is not capped at the max util
present in the group, so it can be larger than the capacity stored in
the array. Ensure that when this happens, we always use the last entry
in the array to fetch energy from.

Tested with synthetics on Juno board.

Bug: 38159576
Change-Id: I89fb52fb7e68fa3e682e308acc232596672d03f7
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b1df3873b6fd..5cac6a77b2bc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5384,17 +5384,20 @@ long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
 static int find_new_capacity(struct energy_env *eenv,
 	const struct sched_group_energy * const sge)
 {
-	int idx;
+	int idx, max_idx = sge->nr_cap_states - 1;
 	unsigned long util = group_max_util(eenv);
 
+	/* default is max_cap if we don't find a match */
+	eenv->cap_idx = max_idx;
+
 	for (idx = 0; idx < sge->nr_cap_states; idx++) {
-		if (sge->cap_states[idx].cap >= util)
+		if (sge->cap_states[idx].cap >= util) {
+			eenv->cap_idx = idx;
 			break;
+		}
 	}
 
-	eenv->cap_idx = idx;
-
-	return idx;
+	return eenv->cap_idx;
 }
 
 static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
-- 
cgit v1.2.3


From cc005cde424b9ae89e1b6fe47da4f2c4a827c73f Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <kevin.brodsky@arm.com>
Date: Fri, 4 Aug 2017 10:17:00 -0700
Subject: UPSTREAM: arm64: compat: Remove leftover variable declaration

(cherry picked from commit 82d24d114f249d919b918ff8eefde4117db8f088)

Commit a1d5ebaf8ccd ("arm64: big-endian: don't treat code as data when
copying sigret code") moved the 32-bit sigreturn trampoline code from
the aarch32_sigret_code array to kuser32.S. The commit removed the
array definition from signal32.c, but not its declaration in
signal32.h. Remove the leftover declaration.

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Bug: 20045882
Bug: 63737556
Change-Id: Ic8a5f0e367f0ecd5c5ddd9e3885d0285f91cf89e
---
 arch/arm64/include/asm/signal32.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index eeaa97559bab..81abea0b7650 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -22,8 +22,6 @@
 
 #define AARCH32_KERN_SIGRET_CODE_OFFSET	0x500
 
-extern const compat_ulong_t aarch32_sigret_code[6];
-
 int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
 		       struct pt_regs *regs);
 int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
-- 
cgit v1.2.3


From 95317055df213596ee7c25836a6ac64bb56c3cb5 Mon Sep 17 00:00:00 2001
From: Martijn Coenen <maco@android.com>
Date: Thu, 19 Oct 2017 15:04:46 +0200
Subject: ANDROID: binder: Add thread->process_todo flag.

This flag determines whether the thread should currently
process the work in the thread->todo worklist.

The prime usecase for this is improving the performance
of synchronous transactions: all synchronous transactions
post a BR_TRANSACTION_COMPLETE to the calling thread,
but there's no reason to return that command to userspace
right away - userspace anyway needs to wait for the reply.

Likewise, a synchronous transaction that contains a binder
object can cause a BC_ACQUIRE/BC_INCREFS to be returned to
userspace; since the caller must anyway hold a strong/weak
ref for the duration of the call, postponing these commands
until the reply comes in is not a problem.

Note that this flag is not used to determine whether a
thread can handle process work; a thread should never pick
up process work when thread work is still pending.

Before patch:
------------------------------------------------------------------
Benchmark                           Time           CPU Iterations
------------------------------------------------------------------
BM_sendVec_binderize/4          45959 ns      20288 ns      34351
BM_sendVec_binderize/8          45603 ns      20080 ns      34909
BM_sendVec_binderize/16         45528 ns      20113 ns      34863
BM_sendVec_binderize/32         45551 ns      20122 ns      34881
BM_sendVec_binderize/64         45701 ns      20183 ns      34864
BM_sendVec_binderize/128        45824 ns      20250 ns      34576
BM_sendVec_binderize/256        45695 ns      20171 ns      34759
BM_sendVec_binderize/512        45743 ns      20211 ns      34489
BM_sendVec_binderize/1024       46169 ns      20430 ns      34081

After patch:
------------------------------------------------------------------
Benchmark                           Time           CPU Iterations
------------------------------------------------------------------
BM_sendVec_binderize/4          42939 ns      17262 ns      40653
BM_sendVec_binderize/8          42823 ns      17243 ns      40671
BM_sendVec_binderize/16         42898 ns      17243 ns      40594
BM_sendVec_binderize/32         42838 ns      17267 ns      40527
BM_sendVec_binderize/64         42854 ns      17249 ns      40379
BM_sendVec_binderize/128        42881 ns      17288 ns      40427
BM_sendVec_binderize/256        42917 ns      17297 ns      40429
BM_sendVec_binderize/512        43184 ns      17395 ns      40411
BM_sendVec_binderize/1024       43119 ns      17357 ns      40432

Signed-off-by: Martijn Coenen <maco@android.com>

Change-Id: Ia70287066d62aba64e98ac44ff1214e37ca75693
---
 drivers/android/binder.c | 144 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 100 insertions(+), 44 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 112e61a4806b..6dfcc80f3fc6 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -601,6 +601,8 @@ enum {
  *                        (protected by @proc->inner_lock)
  * @todo:                 list of work to do for this thread
  *                        (protected by @proc->inner_lock)
+ * @process_todo:         whether work in @todo should be processed
+ *                        (protected by @proc->inner_lock)
  * @return_error:         transaction errors reported by this thread
  *                        (only accessed by this thread)
  * @reply_error:          transaction errors reported by target thread
@@ -627,6 +629,7 @@ struct binder_thread {
 	bool looper_need_return; /* can be written by other thread */
 	struct binder_transaction *transaction_stack;
 	struct list_head todo;
+	bool process_todo;
 	struct binder_error return_error;
 	struct binder_error reply_error;
 	wait_queue_head_t wait;
@@ -814,6 +817,16 @@ static bool binder_worklist_empty(struct binder_proc *proc,
 	return ret;
 }
 
+/**
+ * binder_enqueue_work_ilocked() - Add an item to the work list
+ * @work:         struct binder_work to add to list
+ * @target_list:  list to add work to
+ *
+ * Adds the work to the specified list. Asserts that work
+ * is not already on a list.
+ *
+ * Requires the proc->inner_lock to be held.
+ */
 static void
 binder_enqueue_work_ilocked(struct binder_work *work,
 			   struct list_head *target_list)
@@ -824,22 +837,56 @@ binder_enqueue_work_ilocked(struct binder_work *work,
 }
 
 /**
- * binder_enqueue_work() - Add an item to the work list
- * @proc:         binder_proc associated with list
+ * binder_enqueue_thread_work_ilocked_nowake() - Add thread work
+ * @thread:       thread to queue work to
  * @work:         struct binder_work to add to list
- * @target_list:  list to add work to
  *
- * Adds the work to the specified list. Asserts that work
- * is not already on a list.
+ * Adds the work to the todo list of the thread. Doesn't set the process_todo
+ * flag, which means that (if it wasn't already set) the thread will go to
+ * sleep without handling this work when it calls read.
+ *
+ * Requires the proc->inner_lock to be held.
  */
 static void
-binder_enqueue_work(struct binder_proc *proc,
-		    struct binder_work *work,
-		    struct list_head *target_list)
+binder_enqueue_thread_work_ilocked_nowake(struct binder_thread *thread,
+					  struct binder_work *work)
 {
-	binder_inner_proc_lock(proc);
-	binder_enqueue_work_ilocked(work, target_list);
-	binder_inner_proc_unlock(proc);
+	binder_enqueue_work_ilocked(work, &thread->todo);
+}
+
+/**
+ * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list
+ * @thread:       thread to queue work to
+ * @work:         struct binder_work to add to list
+ *
+ * Adds the work to the todo list of the thread, and enables processing
+ * of the todo queue.
+ *
+ * Requires the proc->inner_lock to be held.
+ */
+static void
+binder_enqueue_thread_work_ilocked(struct binder_thread *thread,
+				   struct binder_work *work)
+{
+	binder_enqueue_work_ilocked(work, &thread->todo);
+	thread->process_todo = true;
+}
+
+/**
+ * binder_enqueue_thread_work() - Add an item to the thread work list
+ * @thread:       thread to queue work to
+ * @work:         struct binder_work to add to list
+ *
+ * Adds the work to the todo list of the thread, and enables processing
+ * of the todo queue.
+ */
+static void
+binder_enqueue_thread_work(struct binder_thread *thread,
+			   struct binder_work *work)
+{
+	binder_inner_proc_lock(thread->proc);
+	binder_enqueue_thread_work_ilocked(thread, work);
+	binder_inner_proc_unlock(thread->proc);
 }
 
 static void
@@ -954,7 +1001,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 static bool binder_has_work_ilocked(struct binder_thread *thread,
 				    bool do_proc_work)
 {
-	return !binder_worklist_empty_ilocked(&thread->todo) ||
+	return thread->process_todo ||
 		thread->looper_need_return ||
 		(do_proc_work &&
 		 !binder_worklist_empty_ilocked(&thread->proc->todo));
@@ -1371,6 +1418,17 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong,
 			node->local_strong_refs++;
 		if (!node->has_strong_ref && target_list) {
 			binder_dequeue_work_ilocked(&node->work);
+			/*
+			 * Note: this function is the only place where we queue
+			 * directly to a thread->todo without using the
+			 * corresponding binder_enqueue_thread_work() helper
+			 * functions; in this case it's ok to not set the
+			 * process_todo flag, since we know this node work will
+			 * always be followed by other work that starts queue
+			 * processing: in case of synchronous transactions, a
+			 * BR_REPLY or BR_ERROR; in case of oneway
+			 * transactions, a BR_TRANSACTION_COMPLETE.
+			 */
 			binder_enqueue_work_ilocked(&node->work, target_list);
 		}
 	} else {
@@ -1382,6 +1440,9 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong,
 					node->debug_id);
 				return -EINVAL;
 			}
+			/*
+			 * See comment above
+			 */
 			binder_enqueue_work_ilocked(&node->work, target_list);
 		}
 	}
@@ -2071,9 +2132,9 @@ static void binder_send_failed_reply(struct binder_transaction *t,
 			binder_pop_transaction_ilocked(target_thread, t);
 			if (target_thread->reply_error.cmd == BR_OK) {
 				target_thread->reply_error.cmd = error_code;
-				binder_enqueue_work_ilocked(
-					&target_thread->reply_error.work,
-					&target_thread->todo);
+				binder_enqueue_thread_work_ilocked(
+					target_thread,
+					&target_thread->reply_error.work);
 				wake_up_interruptible(&target_thread->wait);
 			} else {
 				WARN(1, "Unexpected reply error: %u\n",
@@ -2712,11 +2773,10 @@ static bool binder_proc_transaction(struct binder_transaction *t,
 				    struct binder_proc *proc,
 				    struct binder_thread *thread)
 {
-	struct list_head *target_list = NULL;
 	struct binder_node *node = t->buffer->target_node;
 	struct binder_priority node_prio;
 	bool oneway = !!(t->flags & TF_ONE_WAY);
-	bool wakeup = true;
+	bool pending_async = false;
 
 	BUG_ON(!node);
 	binder_node_lock(node);
@@ -2726,8 +2786,7 @@ static bool binder_proc_transaction(struct binder_transaction *t,
 	if (oneway) {
 		BUG_ON(thread);
 		if (node->has_async_transaction) {
-			target_list = &node->async_todo;
-			wakeup = false;
+			pending_async = true;
 		} else {
 			node->has_async_transaction = 1;
 		}
@@ -2741,22 +2800,20 @@ static bool binder_proc_transaction(struct binder_transaction *t,
 		return false;
 	}
 
-	if (!thread && !target_list)
+	if (!thread && !pending_async)
 		thread = binder_select_thread_ilocked(proc);
 
 	if (thread) {
-		target_list = &thread->todo;
 		binder_transaction_priority(thread->task, t, node_prio,
 					    node->inherit_rt);
-	} else if (!target_list) {
-		target_list = &proc->todo;
+		binder_enqueue_thread_work_ilocked(thread, &t->work);
+	} else if (!pending_async) {
+		binder_enqueue_work_ilocked(&t->work, &proc->todo);
 	} else {
-		BUG_ON(target_list != &node->async_todo);
+		binder_enqueue_work_ilocked(&t->work, &node->async_todo);
 	}
 
-	binder_enqueue_work_ilocked(&t->work, target_list);
-
-	if (wakeup)
+	if (!pending_async)
 		binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */);
 
 	binder_inner_proc_unlock(proc);
@@ -3258,10 +3315,10 @@ static void binder_transaction(struct binder_proc *proc,
 		}
 	}
 	tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE;
-	binder_enqueue_work(proc, tcomplete, &thread->todo);
 	t->work.type = BINDER_WORK_TRANSACTION;
 
 	if (reply) {
+		binder_enqueue_thread_work(thread, tcomplete);
 		binder_inner_proc_lock(target_proc);
 		if (target_thread->is_dead) {
 			binder_inner_proc_unlock(target_proc);
@@ -3269,7 +3326,7 @@ static void binder_transaction(struct binder_proc *proc,
 		}
 		BUG_ON(t->buffer->async_transaction != 0);
 		binder_pop_transaction_ilocked(target_thread, in_reply_to);
-		binder_enqueue_work_ilocked(&t->work, &target_thread->todo);
+		binder_enqueue_thread_work_ilocked(target_thread, &t->work);
 		binder_inner_proc_unlock(target_proc);
 		wake_up_interruptible_sync(&target_thread->wait);
 		binder_restore_priority(current, in_reply_to->saved_priority);
@@ -3277,6 +3334,7 @@ static void binder_transaction(struct binder_proc *proc,
 	} else if (!(t->flags & TF_ONE_WAY)) {
 		BUG_ON(t->buffer->async_transaction != 0);
 		binder_inner_proc_lock(proc);
+		binder_enqueue_thread_work_ilocked_nowake(thread, tcomplete);
 		t->need_reply = 1;
 		t->from_parent = thread->transaction_stack;
 		thread->transaction_stack = t;
@@ -3290,6 +3348,7 @@ static void binder_transaction(struct binder_proc *proc,
 	} else {
 		BUG_ON(target_node == NULL);
 		BUG_ON(t->buffer->async_transaction != 1);
+		binder_enqueue_thread_work(thread, tcomplete);
 		if (!binder_proc_transaction(t, target_proc, NULL))
 			goto err_dead_proc_or_thread;
 	}
@@ -3369,15 +3428,11 @@ err_invalid_target_handle:
 	if (in_reply_to) {
 		binder_restore_priority(current, in_reply_to->saved_priority);
 		thread->return_error.cmd = BR_TRANSACTION_COMPLETE;
-		binder_enqueue_work(thread->proc,
-				    &thread->return_error.work,
-				    &thread->todo);
+		binder_enqueue_thread_work(thread, &thread->return_error.work);
 		binder_send_failed_reply(in_reply_to, return_error);
 	} else {
 		thread->return_error.cmd = return_error;
-		binder_enqueue_work(thread->proc,
-				    &thread->return_error.work,
-				    &thread->todo);
+		binder_enqueue_thread_work(thread, &thread->return_error.work);
 	}
 }
 
@@ -3681,10 +3736,9 @@ static int binder_thread_write(struct binder_proc *proc,
 					WARN_ON(thread->return_error.cmd !=
 						BR_OK);
 					thread->return_error.cmd = BR_ERROR;
-					binder_enqueue_work(
-						thread->proc,
-						&thread->return_error.work,
-						&thread->todo);
+					binder_enqueue_thread_work(
+						thread,
+						&thread->return_error.work);
 					binder_debug(
 						BINDER_DEBUG_FAILED_TRANSACTION,
 						"%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n",
@@ -3764,9 +3818,9 @@ static int binder_thread_write(struct binder_proc *proc,
 					if (thread->looper &
 					    (BINDER_LOOPER_STATE_REGISTERED |
 					     BINDER_LOOPER_STATE_ENTERED))
-						binder_enqueue_work_ilocked(
-								&death->work,
-								&thread->todo);
+						binder_enqueue_thread_work_ilocked(
+								thread,
+								&death->work);
 					else {
 						binder_enqueue_work_ilocked(
 								&death->work,
@@ -3821,8 +3875,8 @@ static int binder_thread_write(struct binder_proc *proc,
 				if (thread->looper &
 					(BINDER_LOOPER_STATE_REGISTERED |
 					 BINDER_LOOPER_STATE_ENTERED))
-					binder_enqueue_work_ilocked(
-						&death->work, &thread->todo);
+					binder_enqueue_thread_work_ilocked(
+						thread, &death->work);
 				else {
 					binder_enqueue_work_ilocked(
 							&death->work,
@@ -3996,6 +4050,8 @@ retry:
 			break;
 		}
 		w = binder_dequeue_work_head_ilocked(list);
+		if (binder_worklist_empty_ilocked(&thread->todo))
+			thread->process_todo = false;
 
 		switch (w->type) {
 		case BINDER_WORK_TRANSACTION: {
-- 
cgit v1.2.3


From c8bc3e3a3ede641b48360ab0bd35eb316168008b Mon Sep 17 00:00:00 2001
From: Martijn Coenen <maco@android.com>
Date: Tue, 24 Oct 2017 16:37:39 +0200
Subject: ANDROID: binder: show high watermark of alloc->pages.

Show the high watermark of the index into the alloc->pages
array, to facilitate sizing the buffer on a per-process
basis.

Change-Id: I2b40cd16628e0ee45216c51dc9b3c5b0c862032e
Signed-off-by: Martijn Coenen <maco@android.com>
---
 drivers/android/binder_alloc.c | 4 ++++
 drivers/android/binder_alloc.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e431cfcfb59b..3a4279d219f7 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -282,6 +282,9 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
 			goto err_vm_insert_page_failed;
 		}
 
+		if (index + 1 > alloc->pages_high)
+			alloc->pages_high = index + 1;
+
 		trace_binder_alloc_page_end(alloc, index);
 		/* vm_insert_page does not seem to increment the refcount */
 	}
@@ -855,6 +858,7 @@ void binder_alloc_print_pages(struct seq_file *m,
 	}
 	mutex_unlock(&alloc->mutex);
 	seq_printf(m, "  pages: %d:%d:%d\n", active, lru, free);
+	seq_printf(m, "  pages high watermark: %zu\n", alloc->pages_high);
 }
 
 /**
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 2dd33b6df104..0b145307f1fd 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -92,6 +92,7 @@ struct binder_lru_page {
  * @pages:              array of binder_lru_page
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
+ * @pages_high:         high watermark of offset in @pages
  *
  * Bookkeeping structure for per-proc address space management for binder
  * buffers. It is normally initialized during binder_init() and binder_mmap()
@@ -112,6 +113,7 @@ struct binder_alloc {
 	size_t buffer_size;
 	uint32_t buffer_free;
 	int pid;
+	size_t pages_high;
 };
 
 #ifdef CONFIG_ANDROID_BINDER_IPC_SELFTEST
-- 
cgit v1.2.3


From abe43c97cae28a0b06f632e2fbadd0c20b8cca5e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 16 Oct 2017 16:21:19 +0200
Subject: USB: devio: Revert "USB: devio: Don't corrupt user memory"

commit 845d584f41eac3475c21e4a7d5e88d0f6e410cf7 upstream.

Taking the uurb->buffer_length userspace passes in as a maximum for the
actual urbs transfer_buffer_length causes 2 serious issues:

1) It breaks isochronous support for all userspace apps using libusb,
   as existing libusb versions pass in 0 for uurb->buffer_length,
   relying on the kernel using the lenghts of the usbdevfs_iso_packet_desc
   descriptors passed in added together as buffer length.

   This for example causes redirection of USB audio and Webcam's into
   virtual machines using qemu-kvm to no longer work. This is a userspace
   ABI break and as such must be reverted.

   Note that the original commit does not protect other users / the
   kernels memory, it only stops the userspace process making the call
   from shooting itself in the foot.

2) It may cause the kernel to program host controllers to DMA over random
   memory. Just as the devio code used to only look at the iso_packet_desc
   lenghts, the host drivers do the same, relying on the submitter of the
   urbs to make sure the entire buffer is large enough and not checking
   transfer_buffer_length.

   But the "USB: devio: Don't corrupt user memory" commit now takes the
   userspace provided uurb->buffer_length for the buffer-size while copying
   over the user-provided iso_packet_desc lengths 1:1, allowing the user
   to specify a small buffer size while programming the host controller to
   dma a lot more data.

   (Atleast the ohci, uhci, xhci and fhci drivers do not check
    transfer_buffer_length for isoc transfers.)

This reverts commit fa1ed74eb1c2 ("USB: devio: Don't corrupt user memory")
fixing both these issues.

Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index bd9419213d06..873ba02d59e6 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1417,11 +1417,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 			totlen += isopkt[u].length;
 		}
 		u *= sizeof(struct usb_iso_packet_descriptor);
-		if (totlen <= uurb->buffer_length)
-			uurb->buffer_length = totlen;
-		else
-			WARN_ONCE(1, "uurb->buffer_length is too short %d vs %d",
-				  totlen, uurb->buffer_length);
+		uurb->buffer_length = totlen;
 		break;
 
 	default:
-- 
cgit v1.2.3


From 9dff499d822660c6dbb2a407a7d85be26f87da07 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 18 Oct 2017 12:49:38 -0400
Subject: USB: core: fix out-of-bounds access bug in usb_get_bos_descriptor()

commit 1c0edc3633b56000e18d82fc241e3995ca18a69e upstream.

Andrey used the syzkaller fuzzer to find an out-of-bounds memory
access in usb_get_bos_descriptor().  The code wasn't checking that the
next usb_dev_cap_header structure could fit into the remaining buffer
space.

This patch fixes the error and also reduces the bNumDeviceCaps field
in the header to match the actual number of capabilities found, in
cases where there are fewer than expected.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index d9d048fc9082..5172bec612eb 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -926,10 +926,12 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 	for (i = 0; i < num; i++) {
 		buffer += length;
 		cap = (struct usb_dev_cap_header *)buffer;
-		length = cap->bLength;
 
-		if (total_len < length)
+		if (total_len < sizeof(*cap) || total_len < cap->bLength) {
+			dev->bos->desc->bNumDeviceCaps = i;
 			break;
+		}
+		length = cap->bLength;
 		total_len -= length;
 
 		if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
-- 
cgit v1.2.3


From 4512d6503a4db9c5cad41c18a2671ffaa6cb2be9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 12 Oct 2017 14:50:46 +0200
Subject: USB: serial: metro-usb: add MS7820 device id

commit 31dc3f819bac28a0990b36510197560258ab7421 upstream.

Add device-id entry for (Honeywell) Metrologic MS7820 bar code scanner.

The device has two interfaces (in this mode?); a vendor-specific
interface with two interrupt endpoints and a second HID interface, which
we do not bind to.

Reported-by: Ladislav Dobrovsky <ladislav.dobrovsky@gmail.com>
Tested-by: Ladislav Dobrovsky <ladislav.dobrovsky@gmail.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/metro-usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/metro-usb.c b/drivers/usb/serial/metro-usb.c
index 39e683096e94..45182c65fa1f 100644
--- a/drivers/usb/serial/metro-usb.c
+++ b/drivers/usb/serial/metro-usb.c
@@ -45,6 +45,7 @@ struct metrousb_private {
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_BI) },
 	{ USB_DEVICE(FOCUS_VENDOR_ID, FOCUS_PRODUCT_ID_UNI) },
+	{ USB_DEVICE_INTERFACE_CLASS(0x0c2e, 0x0730, 0xff) },	/* MS7820 */
 	{ }, /* Terminating entry. */
 };
 MODULE_DEVICE_TABLE(usb, id_table);
-- 
cgit v1.2.3


From d729f29a291f61fbb8d406eabfbc969c3f6cccc2 Mon Sep 17 00:00:00 2001
From: Maksim Salau <msalau@iotecha.com>
Date: Wed, 11 Oct 2017 11:10:52 +0300
Subject: usb: cdc_acm: Add quirk for Elatec TWN3

commit 765fb2f181cad669f2beb87842a05d8071f2be85 upstream.

Elatec TWN3 has the union descriptor on data interface. This results in
failure to bind the device to the driver with the following log:
  usb 1-1.2: new full speed USB device using streamplug-ehci and address 4
  usb 1-1.2: New USB device found, idVendor=09d8, idProduct=0320
  usb 1-1.2: New USB device strings: Mfr=1, Product=2, SerialNumber=0
  usb 1-1.2: Product: RFID Device (COM)
  usb 1-1.2: Manufacturer: OEM
  cdc_acm 1-1.2:1.0: Zero length descriptor references
  cdc_acm: probe of 1-1.2:1.0 failed with error -22

Adding the NO_UNION_NORMAL quirk for the device fixes the issue.

`lsusb -v` of the device:

Bus 001 Device 003: ID 09d8:0320
Device Descriptor:
  bLength                18
  bDescriptorType         1
  bcdUSB               2.00
  bDeviceClass            2 Communications
  bDeviceSubClass         0
  bDeviceProtocol         0
  bMaxPacketSize0        32
  idVendor           0x09d8
  idProduct          0x0320
  bcdDevice            3.00
  iManufacturer           1 OEM
  iProduct                2 RFID Device (COM)
  iSerial                 0
  bNumConfigurations      1
  Configuration Descriptor:
    bLength                 9
    bDescriptorType         2
    wTotalLength           67
    bNumInterfaces          2
    bConfigurationValue     1
    iConfiguration          0
    bmAttributes         0x80
      (Bus Powered)
    MaxPower              250mA
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        0
      bAlternateSetting       0
      bNumEndpoints           1
      bInterfaceClass         2 Communications
      bInterfaceSubClass      2 Abstract (modem)
      bInterfaceProtocol      1 AT-commands (v.25ter)
      iInterface              0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x83  EP 3 IN
        bmAttributes            3
          Transfer Type            Interrupt
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0020  1x 32 bytes
        bInterval               2
    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        1
      bAlternateSetting       0
      bNumEndpoints           2
      bInterfaceClass        10 CDC Data
      bInterfaceSubClass      0 Unused
      bInterfaceProtocol      0
      iInterface              0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x02  EP 2 OUT
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0020  1x 32 bytes
        bInterval               0
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x81  EP 1 IN
        bmAttributes            2
          Transfer Type            Bulk
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0020  1x 32 bytes
        bInterval               0
      CDC Header:
        bcdCDC               1.10
      CDC Call Management:
        bmCapabilities       0x03
          call management
          use DataInterface
        bDataInterface          1
      CDC ACM:
        bmCapabilities       0x06
          sends break
          line coding and serial state
      CDC Union:
        bMasterInterface        0
        bSlaveInterface         1
Device Status:     0x0000
  (Bus Powered)

Signed-off-by: Maksim Salau <msalau@iotecha.com>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index df96f5f88c15..3f6bb3fff890 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1762,6 +1762,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */
 	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
 	},
+	{ USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */
+	.driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */
+	},
 
 	{ USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */
 	.driver_info = CLEAR_HALT_CONDITIONS,
-- 
cgit v1.2.3


From d012ab210f5f4ff4e6f5567fe80e1dcc82077c2d Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 3 Oct 2017 11:16:43 +0300
Subject: usb: quirks: add quirk for WORLDE MINI MIDI keyboard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2811501e6d8f5747d08f8e25b9ecf472d0dc4c7d upstream.

This keyboard doesn't implement Get String descriptors properly even
though string indexes are valid. What happens is that when requesting
for the String descriptor, the device disconnects and
reconnects. Without this quirk, this loop will continue forever.

Cc: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Владимир Мартьянов <vilgeforce@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 82806e311202..a6aaf2f193a4 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -221,6 +221,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Corsair Strafe RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT },
 
+	/* MIDI keyboard WORLDE MINI */
+	{ USB_DEVICE(0x1c75, 0x0204), .driver_info =
+			USB_QUIRK_CONFIG_INTF_STRINGS },
+
 	/* Acer C120 LED Projector */
 	{ USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },
 
-- 
cgit v1.2.3


From 67e25805e74879eb40bffe7b8e660c3966ee113a Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 17 Oct 2017 16:07:33 +0300
Subject: usb: hub: Allow reset retry for USB2 devices on connect bounce

commit 1ac7db63333db1eeff901bfd6bbcd502b4634fa4 upstream.

If the connect status change is set during reset signaling, but
the status remains connected just retry port reset.

This solves an issue with connecting a 90W HP Thunderbolt 3 dock
with a Lenovo Carbon x1 (5th generation) which causes a 30min loop
of a high speed device being re-discovererd before usb ports starts
working.

[...]
[ 389.023845] usb 3-1: new high-speed USB device number 55 using xhci_hcd
[ 389.491841] usb 3-1: new high-speed USB device number 56 using xhci_hcd
[ 389.959928] usb 3-1: new high-speed USB device number 57 using xhci_hcd
[...]

This is caused by a high speed device that doesn't successfully go to the
enabled state after the second port reset. Instead the connection bounces
(connected, with connect status change), bailing out completely from
enumeration just to restart from scratch.

Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1716332

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 51bba58c0c3b..22e61786354a 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2656,13 +2656,16 @@ static int hub_port_wait_reset(struct usb_hub *hub, int port1,
 	if (!(portstatus & USB_PORT_STAT_CONNECTION))
 		return -ENOTCONN;
 
-	/* bomb out completely if the connection bounced.  A USB 3.0
-	 * connection may bounce if multiple warm resets were issued,
+	/* Retry if connect change is set but status is still connected.
+	 * A USB 3.0 connection may bounce if multiple warm resets were issued,
 	 * but the device may have successfully re-connected. Ignore it.
 	 */
 	if (!hub_is_superspeed(hub->hdev) &&
-			(portchange & USB_PORT_STAT_C_CONNECTION))
-		return -ENOTCONN;
+	    (portchange & USB_PORT_STAT_C_CONNECTION)) {
+		usb_clear_port_feature(hub->hdev, port1,
+				       USB_PORT_FEAT_C_CONNECTION);
+		return -EAGAIN;
+	}
 
 	if (!(portstatus & USB_PORT_STAT_ENABLE))
 		return -EBUSY;
-- 
cgit v1.2.3


From f7d8b2e150eeb16646e398f90d7506dc84d8a32d Mon Sep 17 00:00:00 2001
From: Jussi Laako <jussi@sonarnerd.net>
Date: Sun, 15 Oct 2017 12:41:32 +0300
Subject: ALSA: usb-audio: Add native DSD support for Pro-Ject Pre Box S2
 Digital

commit 9bb201a5d5acc733943e8af7151cceab9d976a69 upstream.

Add native DSD support quirk for Pro-Ject Pre Box S2 Digital USB id
2772:0230.

Signed-off-by: Jussi Laako <jussi@sonarnerd.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 1cc20d138dae..9c5368e7ee23 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1305,6 +1305,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
 	case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
 	case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
+	case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
-- 
cgit v1.2.3


From d68b07a19a9ed71a60ffe28cf0a8aa5988dcdc5e Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Thu, 14 Sep 2017 18:37:14 +0200
Subject: can: gs_usb: fix busy loop if no more TX context is available

commit 97819f943063b622eca44d3644067c190dc75039 upstream.

If sending messages with no cable connected, it quickly happens that
there is no more TX context available. Then "gs_can_start_xmit()"
returns with "NETDEV_TX_BUSY" and the upper layer does retry
immediately keeping the CPU busy. To fix that issue, I moved
"atomic_dec(&dev->active_tx_urbs)" from "gs_usb_xmit_callback()" to
the TX done handling in "gs_usb_receive_bulk_callback()". Renaming
"active_tx_urbs" to "active_tx_contexts" and moving it into
"gs_[alloc|free]_tx_context()" would also make sense.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/gs_usb.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index ae5709354546..27e2352fcc42 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -356,6 +356,8 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
 
 		gs_free_tx_context(txc);
 
+		atomic_dec(&dev->active_tx_urbs);
+
 		netif_wake_queue(netdev);
 	}
 
@@ -444,14 +446,6 @@ static void gs_usb_xmit_callback(struct urb *urb)
 			  urb->transfer_buffer_length,
 			  urb->transfer_buffer,
 			  urb->transfer_dma);
-
-	atomic_dec(&dev->active_tx_urbs);
-
-	if (!netif_device_present(netdev))
-		return;
-
-	if (netif_queue_stopped(netdev))
-		netif_wake_queue(netdev);
 }
 
 static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, struct net_device *netdev)
-- 
cgit v1.2.3


From 583a4219841d00e96b5de55be160aa7eb7721a4d Mon Sep 17 00:00:00 2001
From: Jonathan Liu <net147@gmail.com>
Date: Mon, 9 Oct 2017 22:46:13 -0500
Subject: usb: musb: sunxi: Explicitly release USB PHY on exit

commit 6ed05c68cbcae42cd52b8e53b66952bfa9c002ce upstream.

This fixes a kernel oops when unloading the driver due to usb_put_phy
being called after usb_phy_generic_unregister when the device is
detached. Calling usb_phy_generic_unregister causes x->dev->driver to
be NULL in usb_put_phy and results in a NULL pointer dereference.

Signed-off-by: Jonathan Liu <net147@gmail.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/sunxi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index d9b0dc461439..2d3be66fb563 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -320,6 +320,8 @@ static int sunxi_musb_exit(struct musb *musb)
 	if (test_bit(SUNXI_MUSB_FL_HAS_SRAM, &glue->flags))
 		sunxi_sram_release(musb->controller->parent);
 
+	devm_usb_put_phy(glue->dev, glue->xceiv);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From aa629364c0e3c5ed3d21690a3a370601ac66eaee Mon Sep 17 00:00:00 2001
From: Jonathan Liu <net147@gmail.com>
Date: Mon, 9 Oct 2017 22:46:12 -0500
Subject: usb: musb: Check for host-mode using is_host_active() on reset
 interrupt

commit 445ef61543da3db5b699f87fb0aa4f227165f6ed upstream.

The sunxi musb has a bug where sometimes it will generate a babble
error on device disconnect instead of a disconnect IRQ. When this
happens the musb controller switches from host mode to device mode
(it clears MUSB_DEVCTL_HM/MUSB_DEVCTL_SESSION and sets
MUSB_DEVCTL_BDEVICE) and gets stuck in this state.

The babble error is misdetected as a bus reset because MUSB_DEVCTL_HM
was cleared.

To fix this, use is_host_active() rather than (devctl & MUSB_DEVCTL_HM)
to detect babble error so that sunxi musb babble recovery can handle it
by restoring the mode. This information is provided by the driver logic
and does not rely on register contents.

Signed-off-by: Jonathan Liu <net147@gmail.com>
Signed-off-by: Bin Liu <b-liu@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 00eed5d66fda..06d83825923a 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -877,7 +877,7 @@ b_host:
 	 */
 	if (int_usb & MUSB_INTR_RESET) {
 		handled = IRQ_HANDLED;
-		if (devctl & MUSB_DEVCTL_HM) {
+		if (is_host_active(musb)) {
 			/*
 			 * When BABBLE happens what we can depends on which
 			 * platform MUSB is running, because some platforms
@@ -887,9 +887,7 @@ b_host:
 			 * drop the session.
 			 */
 			dev_err(musb->controller, "Babble\n");
-
-			if (is_host_active(musb))
-				musb_recover_from_babble(musb);
+			musb_recover_from_babble(musb);
 		} else {
 			dev_dbg(musb->controller, "BUS RESET as %s\n",
 				usb_otg_state_string(musb->xceiv->otg->state));
-- 
cgit v1.2.3


From 4d56587c28d44308e5db275b34e5200fab2cc2f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20M=C3=A4tje?= <Stefan.Maetje@esd.eu>
Date: Wed, 18 Oct 2017 13:25:17 +0200
Subject: can: esd_usb2: Fix can_dlc value for received RTR, frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 72d92e865d1560723e1957ee3f393688c49ca5bf upstream.

The dlc member of the struct rx_msg contains also the ESD_RTR flag to
mark received RTR frames. Without the fix the can_dlc value for received
RTR frames would always be set to 8 by get_can_dlc() instead of the
received value.

Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device")
Signed-off-by: Stefan Mätje <stefan.maetje@esd.eu>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/esd_usb2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 113e64fcd73b..4c6707ecc619 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -333,7 +333,7 @@ static void esd_usb2_rx_can_msg(struct esd_usb2_net_priv *priv,
 		}
 
 		cf->can_id = id & ESD_IDMASK;
-		cf->can_dlc = get_can_dlc(msg->msg.rx.dlc);
+		cf->can_dlc = get_can_dlc(msg->msg.rx.dlc & ~ESD_RTR);
 
 		if (id & ESD_EXTID)
 			cf->can_id |= CAN_EFF_FLAG;
-- 
cgit v1.2.3


From 195674adee572b97761501f3906e4cc0e290891f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 1 Oct 2017 13:52:43 -0400
Subject: drm/nouveau/bsp/g92: disable by default

commit 194d68dd051c2dd5ac2b522ae16100e774e8d869 upstream.

G92's seem to require some additional bit of initialization before the
BSP engine can work. It feels like clocks are not set up for the
underlying VLD engine, which means that all commands submitted to the
xtensa chip end up hanging. VP seems to work fine though.

This still allows people to force-enable the bsp engine if they want to
play around with it, but makes it harder for the card to hang by
default.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c
index 3ef01071f073..103471ff4dc4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/bsp/g84.c
@@ -40,5 +40,5 @@ int
 g84_bsp_new(struct nvkm_device *device, int index, struct nvkm_engine **pengine)
 {
 	return nvkm_xtensa_new_(&g84_bsp, device, index,
-				true, 0x103000, pengine);
+				device->chipset != 0x92, 0x103000, pengine);
 }
-- 
cgit v1.2.3


From 4516069f1b05d3e84f5e11a8490ee0ca09997731 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 25 Sep 2017 15:05:38 +1000
Subject: drm/nouveau/mmu: flush tlbs before deleting page tables

commit 77913bbcb43ac9a07a6fe849c2fd3bf85fc8bdd8 upstream.

Even though we've zeroed the PDE, the GPU may have cached the PD, so we
need to flush when deleting them.

Noticed while working on replacement MMU code, but a backport might be a
good idea, so let's fix it in the current code too.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index e04a2296ecd0..5bb7f7e0f11f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -240,6 +240,8 @@ nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde)
 			mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem);
 		}
 
+		mmu->func->flush(vm);
+
 		nvkm_memory_del(&pgt);
 	}
 }
-- 
cgit v1.2.3


From f9e937124ec22a0c36c45a2d07d693492a46471e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Wed, 18 Oct 2017 00:45:49 +0100
Subject: ALSA: seq: Enable 'use' locking in all configurations

commit 8009d506a1dd00cf436b0c4cca0dcec130580a21 upstream.

The 'use' locking macros are no-ops if neither SMP or SND_DEBUG is
enabled.  This might once have been OK in non-preemptible
configurations, but even in that case snd_seq_read() may sleep while
relying on a 'use' lock.  So always use the proper implementations.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/core/seq/seq_lock.c |  4 ----
 sound/core/seq/seq_lock.h | 12 ------------
 2 files changed, 16 deletions(-)

diff --git a/sound/core/seq/seq_lock.c b/sound/core/seq/seq_lock.c
index 12ba83367b1b..ba5752ee9af3 100644
--- a/sound/core/seq/seq_lock.c
+++ b/sound/core/seq/seq_lock.c
@@ -23,8 +23,6 @@
 #include <sound/core.h>
 #include "seq_lock.h"
 
-#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG)
-
 /* wait until all locks are released */
 void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line)
 {
@@ -42,5 +40,3 @@ void snd_use_lock_sync_helper(snd_use_lock_t *lockp, const char *file, int line)
 }
 
 EXPORT_SYMBOL(snd_use_lock_sync_helper);
-
-#endif
diff --git a/sound/core/seq/seq_lock.h b/sound/core/seq/seq_lock.h
index 54044bc2c9ef..ac38031c370e 100644
--- a/sound/core/seq/seq_lock.h
+++ b/sound/core/seq/seq_lock.h
@@ -3,8 +3,6 @@
 
 #include <linux/sched.h>
 
-#if defined(CONFIG_SMP) || defined(CONFIG_SND_DEBUG)
-
 typedef atomic_t snd_use_lock_t;
 
 /* initialize lock */
@@ -20,14 +18,4 @@ typedef atomic_t snd_use_lock_t;
 void snd_use_lock_sync_helper(snd_use_lock_t *lock, const char *file, int line);
 #define snd_use_lock_sync(lockp) snd_use_lock_sync_helper(lockp, __BASE_FILE__, __LINE__)
 
-#else /* SMP || CONFIG_SND_DEBUG */
-
-typedef spinlock_t snd_use_lock_t;	/* dummy */
-#define snd_use_lock_init(lockp) /**/
-#define snd_use_lock_use(lockp) /**/
-#define snd_use_lock_free(lockp) /**/
-#define snd_use_lock_sync(lockp) /**/
-
-#endif /* SMP || CONFIG_SND_DEBUG */
-
 #endif /* __SND_SEQ_LOCK_H */
-- 
cgit v1.2.3


From 68c610776cfb1dc1f541d88b17918a4182adf392 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 17 Oct 2017 11:58:17 +0200
Subject: ALSA: hda: Remove superfluous '-' added by printk conversion

commit 6bf88a343db2b3c160edf9b82a74966b31cc80bd upstream.

While converting the error messages to the standard macros in the
commit 4e76a8833fac ("ALSA: hda - Replace with standard printk"), a
superfluous '-' slipped in the code mistakenly.  Its influence is
almost negligible, merely shows a dB value as negative integer instead
of positive integer (or vice versa) in the rare error message.
So let's kill this embarrassing byte to show more correct value.

Fixes: 4e76a8833fac ("ALSA: hda - Replace with standard printk")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 83741887faa1..3324f98c35f6 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -1755,7 +1755,7 @@ static int get_kctl_0dB_offset(struct hda_codec *codec,
 			return -1;
 		if (*step_to_check && *step_to_check != step) {
 			codec_err(codec, "Mismatching dB step for vmaster slave (%d!=%d)\n",
--				   *step_to_check, step);
+				   *step_to_check, step);
 			return -1;
 		}
 		*step_to_check = step;
-- 
cgit v1.2.3


From efdcbffb2b16da90eb5446a681cd3082b9b027f0 Mon Sep 17 00:00:00 2001
From: Pontus Andersson <epontan@gmail.com>
Date: Mon, 2 Oct 2017 14:45:19 +0200
Subject: i2c: ismt: Separate I2C block read from SMBus block read

commit c6ebcedbab7ca78984959386012a17b21183e1a3 upstream.

Commit b6c159a9cb69 ("i2c: ismt: Don't duplicate the receive length for
block reads") broke I2C block reads. It aimed to fix normal SMBus block
read, but changed the correct behavior of I2C block read in the process.

According to Documentation/i2c/smbus-protocol, one vital difference
between normal SMBus block read and I2C block read is that there is no
byte count prefixed in the data sent on the wire:

 SMBus Block Read:  i2c_smbus_read_block_data()
 S Addr Wr [A] Comm [A]
            S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P

 I2C Block Read:  i2c_smbus_read_i2c_block_data()
 S Addr Wr [A] Comm [A]
            S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P

Therefore the two transaction types need to be processed differently in
the driver by copying of the dma_buffer as done previously for the
I2C_SMBUS_I2C_BLOCK_DATA case.

Fixes: b6c159a9cb69 ("i2c: ismt: Don't duplicate the receive length for block reads")
Signed-off-by: Pontus Andersson <epontan@gmail.com>
Tested-by: Stephen Douthit <stephend@adiengineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-ismt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index 639d1a9c8793..1111cb966a44 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -338,12 +338,15 @@ static int ismt_process_desc(const struct ismt_desc *desc,
 			data->word = dma_buffer[0] | (dma_buffer[1] << 8);
 			break;
 		case I2C_SMBUS_BLOCK_DATA:
-		case I2C_SMBUS_I2C_BLOCK_DATA:
 			if (desc->rxbytes != dma_buffer[0] + 1)
 				return -EMSGSIZE;
 
 			memcpy(data->block, dma_buffer, desc->rxbytes);
 			break;
+		case I2C_SMBUS_I2C_BLOCK_DATA:
+			memcpy(&data->block[1], dma_buffer, desc->rxbytes);
+			data->block[0] = desc->rxbytes;
+			break;
 		}
 		return 0;
 	}
-- 
cgit v1.2.3


From 260b6739e8b7ce23ef5c73b99b0c399083e72db9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Sep 2017 23:29:12 +0200
Subject: brcmsmac: make some local variables 'static const' to reduce stack
 size

commit c503dd38f850be28867ef7a42d9abe5ade81a9bd upstream.

With KASAN and a couple of other patches applied, this driver is one
of the few remaining ones that actually use more than 2048 bytes of
kernel stack:

broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy_gainctrl':
broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 3264 bytes is larger than 2048 bytes [-Wframe-larger-than=]
broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy':
broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=]

Here, I'm reducing the stack size by marking as many local variables as
'static const' as I can without changing the actual code.

This is the first of three patches to improve the stack usage in this
driver. It would be good to have this backported to stabl kernels
to get all drivers in 'allmodconfig' below the 2048 byte limit so
we can turn on the frame warning again globally, but I realize that
the patch is larger than the normal limit for stable backports.

The other two patches do not need to be backported.

Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/wireless/brcm80211/brcmsmac/phy/phy_n.c    | 197 ++++++++++-----------
 1 file changed, 97 insertions(+), 100 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/brcm80211/brcmsmac/phy/phy_n.c
index 99dac9b8a082..c75bfd3f8cb3 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/phy/phy_n.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/phy/phy_n.c
@@ -14764,8 +14764,8 @@ static void wlc_phy_ipa_restore_tx_digi_filts_nphy(struct brcms_phy *pi)
 }
 
 static void
-wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, u8 *events, u8 *dlys,
-		       u8 len)
+wlc_phy_set_rfseq_nphy(struct brcms_phy *pi, u8 cmd, const u8 *events,
+		       const u8 *dlys, u8 len)
 {
 	u32 t1_offset, t2_offset;
 	u8 ctr;
@@ -15240,16 +15240,16 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev5(struct brcms_phy *pi)
 static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 {
 	u16 currband;
-	s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 };
-	s8 *lna1_gain_db = NULL;
-	s8 *lna1_gain_db_2 = NULL;
-	s8 *lna2_gain_db = NULL;
-	s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 };
-	s8 *tia_gain_db;
-	s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 };
-	s8 *tia_gainbits;
-	u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f };
-	u16 *rfseq_init_gain;
+	static const s8 lna1G_gain_db_rev7[] = { 9, 14, 19, 24 };
+	const s8 *lna1_gain_db = NULL;
+	const s8 *lna1_gain_db_2 = NULL;
+	const s8 *lna2_gain_db = NULL;
+	static const s8 tiaA_gain_db_rev7[] = { -9, -6, -3, 0, 3, 3, 3, 3, 3, 3 };
+	const s8 *tia_gain_db;
+	static const s8 tiaA_gainbits_rev7[] = { 0, 1, 2, 3, 4, 4, 4, 4, 4, 4 };
+	const s8 *tia_gainbits;
+	static const u16 rfseqA_init_gain_rev7[] = { 0x624f, 0x624f };
+	const u16 *rfseq_init_gain;
 	u16 init_gaincode;
 	u16 clip1hi_gaincode;
 	u16 clip1md_gaincode = 0;
@@ -15310,10 +15310,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 
 			if ((freq <= 5080) || (freq == 5825)) {
 
-				s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 };
-				s8 lna1A_gain_db_2_rev7[] = {
-					11, 17, 22, 25};
-				s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 };
+				static const s8 lna1A_gain_db_rev7[] = { 11, 16, 20, 24 };
+				static const s8 lna1A_gain_db_2_rev7[] = { 11, 17, 22, 25};
+				static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 };
 
 				crsminu_th = 0x3e;
 				lna1_gain_db = lna1A_gain_db_rev7;
@@ -15321,10 +15320,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 				lna2_gain_db = lna2A_gain_db_rev7;
 			} else if ((freq >= 5500) && (freq <= 5700)) {
 
-				s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 };
-				s8 lna1A_gain_db_2_rev7[] = {
-					12, 18, 22, 26};
-				s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 };
+				static const s8 lna1A_gain_db_rev7[] = { 11, 17, 21, 25 };
+				static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26};
+				static const s8 lna2A_gain_db_rev7[] = { 1, 8, 12, 16 };
 
 				crsminu_th = 0x45;
 				clip1md_gaincode_B = 0x14;
@@ -15335,10 +15333,9 @@ static void wlc_phy_workarounds_nphy_gainctrl_2057_rev6(struct brcms_phy *pi)
 				lna2_gain_db = lna2A_gain_db_rev7;
 			} else {
 
-				s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 };
-				s8 lna1A_gain_db_2_rev7[] = {
-					12, 18, 22, 26};
-				s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 };
+				static const s8 lna1A_gain_db_rev7[] = { 12, 18, 22, 26 };
+				static const s8 lna1A_gain_db_2_rev7[] = { 12, 18, 22, 26};
+				static const s8 lna2A_gain_db_rev7[] = { -1, 6, 10, 14 };
 
 				crsminu_th = 0x41;
 				lna1_gain_db = lna1A_gain_db_rev7;
@@ -15450,65 +15447,65 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi)
 		NPHY_RFSEQ_CMD_CLR_HIQ_DIS,
 		NPHY_RFSEQ_CMD_SET_HPF_BW
 	};
-	u8 rfseq_updategainu_dlys[] = { 10, 30, 1 };
-	s8 lna1G_gain_db[] = { 7, 11, 16, 23 };
-	s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 };
-	s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 };
-	s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 };
-	s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 };
-	s8 lna1A_gain_db[] = { 7, 11, 17, 23 };
-	s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 };
-	s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 };
-	s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 };
-	s8 *lna1_gain_db = NULL;
-	s8 lna2G_gain_db[] = { -5, 6, 10, 14 };
-	s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 };
-	s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 };
-	s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 };
-	s8 lna2A_gain_db[] = { -6, 2, 6, 10 };
-	s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 };
-	s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 };
-	s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 };
-	s8 *lna2_gain_db = NULL;
-	s8 tiaG_gain_db[] = {
+	static const u8 rfseq_updategainu_dlys[] = { 10, 30, 1 };
+	static const s8 lna1G_gain_db[] = { 7, 11, 16, 23 };
+	static const s8 lna1G_gain_db_rev4[] = { 8, 12, 17, 25 };
+	static const s8 lna1G_gain_db_rev5[] = { 9, 13, 18, 26 };
+	static const s8 lna1G_gain_db_rev6[] = { 8, 13, 18, 25 };
+	static const s8 lna1G_gain_db_rev6_224B0[] = { 10, 14, 19, 27 };
+	static const s8 lna1A_gain_db[] = { 7, 11, 17, 23 };
+	static const s8 lna1A_gain_db_rev4[] = { 8, 12, 18, 23 };
+	static const s8 lna1A_gain_db_rev5[] = { 6, 10, 16, 21 };
+	static const s8 lna1A_gain_db_rev6[] = { 6, 10, 16, 21 };
+	const s8 *lna1_gain_db = NULL;
+	static const s8 lna2G_gain_db[] = { -5, 6, 10, 14 };
+	static const s8 lna2G_gain_db_rev5[] = { -3, 7, 11, 16 };
+	static const s8 lna2G_gain_db_rev6[] = { -5, 6, 10, 14 };
+	static const s8 lna2G_gain_db_rev6_224B0[] = { -5, 6, 10, 15 };
+	static const s8 lna2A_gain_db[] = { -6, 2, 6, 10 };
+	static const s8 lna2A_gain_db_rev4[] = { -5, 2, 6, 10 };
+	static const s8 lna2A_gain_db_rev5[] = { -7, 0, 4, 8 };
+	static const s8 lna2A_gain_db_rev6[] = { -7, 0, 4, 8 };
+	const s8 *lna2_gain_db = NULL;
+	static const s8 tiaG_gain_db[] = {
 		0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A };
-	s8 tiaA_gain_db[] = {
+	static const s8 tiaA_gain_db[] = {
 		0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 };
-	s8 tiaA_gain_db_rev4[] = {
+	static const s8 tiaA_gain_db_rev4[] = {
 		0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d };
-	s8 tiaA_gain_db_rev5[] = {
+	static const s8 tiaA_gain_db_rev5[] = {
 		0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d };
-	s8 tiaA_gain_db_rev6[] = {
+	static const s8 tiaA_gain_db_rev6[] = {
 		0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d };
-	s8 *tia_gain_db;
-	s8 tiaG_gainbits[] = {
+	const s8 *tia_gain_db;
+	static const s8 tiaG_gainbits[] = {
 		0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 };
-	s8 tiaA_gainbits[] = {
+	static const s8 tiaA_gainbits[] = {
 		0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 };
-	s8 tiaA_gainbits_rev4[] = {
+	static const s8 tiaA_gainbits_rev4[] = {
 		0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 };
-	s8 tiaA_gainbits_rev5[] = {
+	static const s8 tiaA_gainbits_rev5[] = {
 		0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 };
-	s8 tiaA_gainbits_rev6[] = {
+	static const s8 tiaA_gainbits_rev6[] = {
 		0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 };
-	s8 *tia_gainbits;
-	s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 };
-	s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 };
-	u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f };
-	u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f };
-	u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f };
-	u16 rfseqG_init_gain_rev5_elna[] = {
+	const s8 *tia_gainbits;
+	static const s8 lpf_gain_db[] = { 0x00, 0x06, 0x0c, 0x12, 0x12, 0x12 };
+	static const s8 lpf_gainbits[] = { 0x00, 0x01, 0x02, 0x03, 0x03, 0x03 };
+	static const u16 rfseqG_init_gain[] = { 0x613f, 0x613f, 0x613f, 0x613f };
+	static const u16 rfseqG_init_gain_rev4[] = { 0x513f, 0x513f, 0x513f, 0x513f };
+	static const u16 rfseqG_init_gain_rev5[] = { 0x413f, 0x413f, 0x413f, 0x413f };
+	static const u16 rfseqG_init_gain_rev5_elna[] = {
 		0x013f, 0x013f, 0x013f, 0x013f };
-	u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f };
-	u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f };
-	u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f };
-	u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f };
-	u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f };
-	u16 rfseqA_init_gain_rev4_elna[] = {
+	static const u16 rfseqG_init_gain_rev6[] = { 0x513f, 0x513f };
+	static const u16 rfseqG_init_gain_rev6_224B0[] = { 0x413f, 0x413f };
+	static const u16 rfseqG_init_gain_rev6_elna[] = { 0x113f, 0x113f };
+	static const u16 rfseqA_init_gain[] = { 0x516f, 0x516f, 0x516f, 0x516f };
+	static const u16 rfseqA_init_gain_rev4[] = { 0x614f, 0x614f, 0x614f, 0x614f };
+	static const u16 rfseqA_init_gain_rev4_elna[] = {
 		0x314f, 0x314f, 0x314f, 0x314f };
-	u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f };
-	u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f };
-	u16 *rfseq_init_gain;
+	static const u16 rfseqA_init_gain_rev5[] = { 0x714f, 0x714f, 0x714f, 0x714f };
+	static const u16 rfseqA_init_gain_rev6[] = { 0x714f, 0x714f };
+	const u16 *rfseq_init_gain;
 	u16 initG_gaincode = 0x627e;
 	u16 initG_gaincode_rev4 = 0x527e;
 	u16 initG_gaincode_rev5 = 0x427e;
@@ -15538,10 +15535,10 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi)
 	u16 clip1mdA_gaincode_rev6 = 0x2084;
 	u16 clip1md_gaincode = 0;
 	u16 clip1loG_gaincode = 0x0074;
-	u16 clip1loG_gaincode_rev5[] = {
+	static const u16 clip1loG_gaincode_rev5[] = {
 		0x0062, 0x0064, 0x006a, 0x106a, 0x106c, 0x1074, 0x107c, 0x207c
 	};
-	u16 clip1loG_gaincode_rev6[] = {
+	static const u16 clip1loG_gaincode_rev6[] = {
 		0x106a, 0x106c, 0x1074, 0x107c, 0x007e, 0x107e, 0x207e, 0x307e
 	};
 	u16 clip1loG_gaincode_rev6_224B0 = 0x1074;
@@ -16066,7 +16063,7 @@ static void wlc_phy_workarounds_nphy_gainctrl(struct brcms_phy *pi)
 
 static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 {
-	u8 rfseq_rx2tx_events[] = {
+	static const u8 rfseq_rx2tx_events[] = {
 		NPHY_RFSEQ_CMD_NOP,
 		NPHY_RFSEQ_CMD_RXG_FBW,
 		NPHY_RFSEQ_CMD_TR_SWITCH,
@@ -16076,7 +16073,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 		NPHY_RFSEQ_CMD_EXT_PA
 	};
 	u8 rfseq_rx2tx_dlys[] = { 8, 6, 6, 2, 4, 60, 1 };
-	u8 rfseq_tx2rx_events[] = {
+	static const u8 rfseq_tx2rx_events[] = {
 		NPHY_RFSEQ_CMD_NOP,
 		NPHY_RFSEQ_CMD_EXT_PA,
 		NPHY_RFSEQ_CMD_TX_GAIN,
@@ -16085,8 +16082,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 		NPHY_RFSEQ_CMD_RXG_FBW,
 		NPHY_RFSEQ_CMD_CLR_HIQ_DIS
 	};
-	u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 };
-	u8 rfseq_tx2rx_events_rev3[] = {
+	static const u8 rfseq_tx2rx_dlys[] = { 8, 6, 2, 4, 4, 6, 1 };
+	static const u8 rfseq_tx2rx_events_rev3[] = {
 		NPHY_REV3_RFSEQ_CMD_EXT_PA,
 		NPHY_REV3_RFSEQ_CMD_INT_PA_PU,
 		NPHY_REV3_RFSEQ_CMD_TX_GAIN,
@@ -16096,7 +16093,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 		NPHY_REV3_RFSEQ_CMD_CLR_HIQ_DIS,
 		NPHY_REV3_RFSEQ_CMD_END
 	};
-	u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 };
+	static const u8 rfseq_tx2rx_dlys_rev3[] = { 8, 4, 2, 2, 4, 4, 6, 1 };
 	u8 rfseq_rx2tx_events_rev3[] = {
 		NPHY_REV3_RFSEQ_CMD_NOP,
 		NPHY_REV3_RFSEQ_CMD_RXG_FBW,
@@ -16110,7 +16107,7 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 	};
 	u8 rfseq_rx2tx_dlys_rev3[] = { 8, 6, 6, 4, 4, 18, 42, 1, 1 };
 
-	u8 rfseq_rx2tx_events_rev3_ipa[] = {
+	static const u8 rfseq_rx2tx_events_rev3_ipa[] = {
 		NPHY_REV3_RFSEQ_CMD_NOP,
 		NPHY_REV3_RFSEQ_CMD_RXG_FBW,
 		NPHY_REV3_RFSEQ_CMD_TR_SWITCH,
@@ -16121,15 +16118,15 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 		NPHY_REV3_RFSEQ_CMD_INT_PA_PU,
 		NPHY_REV3_RFSEQ_CMD_END
 	};
-	u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 };
-	u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f };
+	static const u8 rfseq_rx2tx_dlys_rev3_ipa[] = { 8, 6, 6, 4, 4, 16, 43, 1, 1 };
+	static const u16 rfseq_rx2tx_dacbufpu_rev7[] = { 0x10f, 0x10f };
 
 	s16 alpha0, alpha1, alpha2;
 	s16 beta0, beta1, beta2;
 	u32 leg_data_weights, ht_data_weights, nss1_data_weights,
 	    stbc_data_weights;
 	u8 chan_freq_range = 0;
-	u16 dac_control = 0x0002;
+	static const u16 dac_control = 0x0002;
 	u16 aux_adc_vmid_rev7_core0[] = { 0x8e, 0x96, 0x96, 0x96 };
 	u16 aux_adc_vmid_rev7_core1[] = { 0x8f, 0x9f, 0x9f, 0x96 };
 	u16 aux_adc_vmid_rev4[] = { 0xa2, 0xb4, 0xb4, 0x89 };
@@ -16139,8 +16136,8 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 	u16 aux_adc_gain_rev4[] = { 0x02, 0x02, 0x02, 0x00 };
 	u16 aux_adc_gain_rev3[] = { 0x02, 0x02, 0x02, 0x00 };
 	u16 *aux_adc_gain;
-	u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 };
-	u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 };
+	static const u16 sk_adc_vmid[] = { 0xb4, 0xb4, 0xb4, 0x24 };
+	static const u16 sk_adc_gain[] = { 0x02, 0x02, 0x02, 0x02 };
 	s32 min_nvar_val = 0x18d;
 	s32 min_nvar_offset_6mbps = 20;
 	u8 pdetrange;
@@ -16151,9 +16148,9 @@ static void wlc_phy_workarounds_nphy(struct brcms_phy *pi)
 	u16 rfseq_rx2tx_lpf_h_hpc_rev7 = 0x77;
 	u16 rfseq_tx2rx_lpf_h_hpc_rev7 = 0x77;
 	u16 rfseq_pktgn_lpf_h_hpc_rev7 = 0x77;
-	u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 };
-	u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 };
-	u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 };
+	static const u16 rfseq_htpktgn_lpf_hpc_rev7[] = { 0x77, 0x11, 0x11 };
+	static const u16 rfseq_pktgn_lpf_hpc_rev7[] = { 0x11, 0x11 };
+	static const u16 rfseq_cckpktgn_lpf_hpc_rev7[] = { 0x11, 0x11 };
 	u16 ipalvlshift_3p3_war_en = 0;
 	u16 rccal_bcap_val, rccal_scap_val;
 	u16 rccal_tx20_11b_bcap = 0;
@@ -24291,13 +24288,13 @@ static void wlc_phy_update_txcal_ladder_nphy(struct brcms_phy *pi, u16 core)
 	u16 bbmult;
 	u16 tblentry;
 
-	struct nphy_txiqcal_ladder ladder_lo[] = {
+	static const struct nphy_txiqcal_ladder ladder_lo[] = {
 		{3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0},
 		{25, 0}, {25, 1}, {25, 2}, {25, 3}, {25, 4}, {25, 5},
 		{25, 6}, {25, 7}, {35, 7}, {50, 7}, {71, 7}, {100, 7}
 	};
 
-	struct nphy_txiqcal_ladder ladder_iq[] = {
+	static const struct nphy_txiqcal_ladder ladder_iq[] = {
 		{3, 0}, {4, 0}, {6, 0}, {9, 0}, {13, 0}, {18, 0},
 		{25, 0}, {35, 0}, {50, 0}, {71, 0}, {100, 0}, {100, 1},
 		{100, 2}, {100, 3}, {100, 4}, {100, 5}, {100, 6}, {100, 7}
@@ -25773,67 +25770,67 @@ wlc_phy_cal_txiqlo_nphy(struct brcms_phy *pi, struct nphy_txgains target_gain,
 	u16 cal_gain[2];
 	struct nphy_iqcal_params cal_params[2];
 	u32 tbl_len;
-	void *tbl_ptr;
+	const void *tbl_ptr;
 	bool ladder_updated[2];
 	u8 mphase_cal_lastphase = 0;
 	int bcmerror = 0;
 	bool phyhang_avoid_state = false;
 
-	u16 tbl_tx_iqlo_cal_loft_ladder_20[] = {
+	static const u16 tbl_tx_iqlo_cal_loft_ladder_20[] = {
 		0x0300, 0x0500, 0x0700, 0x0900, 0x0d00, 0x1100, 0x1900, 0x1901,
 		0x1902,
 		0x1903, 0x1904, 0x1905, 0x1906, 0x1907, 0x2407, 0x3207, 0x4607,
 		0x6407
 	};
 
-	u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = {
+	static const u16 tbl_tx_iqlo_cal_iqimb_ladder_20[] = {
 		0x0200, 0x0300, 0x0600, 0x0900, 0x0d00, 0x1100, 0x1900, 0x2400,
 		0x3200,
 		0x4600, 0x6400, 0x6401, 0x6402, 0x6403, 0x6404, 0x6405, 0x6406,
 		0x6407
 	};
 
-	u16 tbl_tx_iqlo_cal_loft_ladder_40[] = {
+	static const u16 tbl_tx_iqlo_cal_loft_ladder_40[] = {
 		0x0200, 0x0300, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1201,
 		0x1202,
 		0x1203, 0x1204, 0x1205, 0x1206, 0x1207, 0x1907, 0x2307, 0x3207,
 		0x4707
 	};
 
-	u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = {
+	static const u16 tbl_tx_iqlo_cal_iqimb_ladder_40[] = {
 		0x0100, 0x0200, 0x0400, 0x0700, 0x0900, 0x0c00, 0x1200, 0x1900,
 		0x2300,
 		0x3200, 0x4700, 0x4701, 0x4702, 0x4703, 0x4704, 0x4705, 0x4706,
 		0x4707
 	};
 
-	u16 tbl_tx_iqlo_cal_startcoefs[] = {
+	static const u16 tbl_tx_iqlo_cal_startcoefs[] = {
 		0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 		0x0000
 	};
 
-	u16 tbl_tx_iqlo_cal_cmds_fullcal[] = {
+	static const u16 tbl_tx_iqlo_cal_cmds_fullcal[] = {
 		0x8123, 0x8264, 0x8086, 0x8245, 0x8056,
 		0x9123, 0x9264, 0x9086, 0x9245, 0x9056
 	};
 
-	u16 tbl_tx_iqlo_cal_cmds_recal[] = {
+	static const u16 tbl_tx_iqlo_cal_cmds_recal[] = {
 		0x8101, 0x8253, 0x8053, 0x8234, 0x8034,
 		0x9101, 0x9253, 0x9053, 0x9234, 0x9034
 	};
 
-	u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = {
+	static const u16 tbl_tx_iqlo_cal_startcoefs_nphyrev3[] = {
 		0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 		0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 		0x0000
 	};
 
-	u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = {
+	static const u16 tbl_tx_iqlo_cal_cmds_fullcal_nphyrev3[] = {
 		0x8434, 0x8334, 0x8084, 0x8267, 0x8056, 0x8234,
 		0x9434, 0x9334, 0x9084, 0x9267, 0x9056, 0x9234
 	};
 
-	u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = {
+	static const u16 tbl_tx_iqlo_cal_cmds_recal_nphyrev3[] = {
 		0x8423, 0x8323, 0x8073, 0x8256, 0x8045, 0x8223,
 		0x9423, 0x9323, 0x9073, 0x9256, 0x9045, 0x9223
 	};
-- 
cgit v1.2.3


From b178c94efdfd7e7c649277c0f570c5db14aaba4f Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jlu@pengutronix.de>
Date: Mon, 28 Aug 2017 17:25:16 +0200
Subject: bus: mbus: fix window size calculation for 4GB windows

commit 2bbbd96357ce76cc45ec722c00f654aa7b189112 upstream.

At least the Armada XP SoC supports 4GB on a single DRAM window. Because
the size register values contain the actual size - 1, the MSB is set in
that case. For example, the SDRAM window's control register's value is
0xffffffe1 for 4GB (bits 31 to 24 contain the size).

The MBUS driver reads back each window's size from registers and
calculates the actual size as (control_reg | ~DDR_SIZE_MASK) + 1, which
overflows for 32 bit values, resulting in other miscalculations further
on (a bad RAM window for the CESA crypto engine calculated by
mvebu_mbus_setup_cpu_target_nooverlap() in my case).

This patch changes the type in 'struct mbus_dram_window' from u32 to
u64, which allows us to keep using the same register calculation code in
most MBUS-using drivers (which calculate ->size - 1 again).

Fixes: fddddb52a6c4 ("bus: introduce an Marvell EBU MBus driver")
Signed-off-by: Jan Luebbe <jlu@pengutronix.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/mvebu-mbus.c | 2 +-
 include/linux/mbus.h     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index c43c3d2baf73..0d628becf37f 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -720,7 +720,7 @@ mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
 			if (mbus->hw_io_coherency)
 				w->mbus_attr |= ATTR_HW_COHERENCY;
 			w->base = base & DDR_BASE_CS_LOW_MASK;
-			w->size = (size | ~DDR_SIZE_MASK) + 1;
+			w->size = (u64)(size | ~DDR_SIZE_MASK) + 1;
 		}
 	}
 	mvebu_mbus_dram_info.num_cs = cs;
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 1f7bc630d225..71a5a56b0bba 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -29,8 +29,8 @@ struct mbus_dram_target_info
 	struct mbus_dram_window {
 		u8	cs_index;
 		u8	mbus_attr;
-		u32	base;
-		u32	size;
+		u64	base;
+		u64	size;
 	} cs[4];
 };
 
-- 
cgit v1.2.3


From cffdaa65e72f76efc821d551b78ceec1f76196be Mon Sep 17 00:00:00 2001
From: David Kozub <zub@linux.fjfi.cvut.cz>
Date: Thu, 19 Oct 2017 22:57:02 +0200
Subject: clockevents/drivers/cs5535: Improve resilience to spurious interrupts

commit eb39a7c0355393c5a8d930f342ad7a6231b552c4 upstream.

The interrupt handler mfgpt_tick() is not robust versus spurious interrupts
which happen before the clock event device is registered and fully
initialized.

The reason is that the safe guard against spurious interrupts solely checks
for the clockevents shutdown state, but lacks a check for detached
state. If the interrupt hits while the device is in detached state it
passes the safe guard and dereferences the event handler call back which is
NULL.

Add the missing state check.

Fixes: 8f9327cbb6e8 ("clockevents/drivers/cs5535: Migrate to new 'set-state' interface")
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Kozub <zub@linux.fjfi.cvut.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lkml.kernel.org/r/20171020093103.3317F6004D@linux.fjfi.cvut.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clocksource/cs5535-clockevt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c
index 9a7e37cf56b0..e1d7373e63e0 100644
--- a/drivers/clocksource/cs5535-clockevt.c
+++ b/drivers/clocksource/cs5535-clockevt.c
@@ -117,7 +117,8 @@ static irqreturn_t mfgpt_tick(int irq, void *dev_id)
 	/* Turn off the clock (and clear the event) */
 	disable_timer(cs5535_event_clock);
 
-	if (clockevent_state_shutdown(&cs5535_clockevent))
+	if (clockevent_state_detached(&cs5535_clockevent) ||
+	    clockevent_state_shutdown(&cs5535_clockevent))
 		return IRQ_HANDLED;
 
 	/* Clear the counter */
-- 
cgit v1.2.3


From 51ba40fcfd6784c7576268aa9de23630c397f387 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Wed, 20 Sep 2017 16:15:05 -0500
Subject: rtlwifi: rtl8821ae: Fix connection lost problem

commit b8b8b16352cd90c6083033fd4487f04fae935c18 upstream.

In commit 40b368af4b75 ("rtlwifi: Fix alignment issues"), the read
of REG_DBI_READ was changed from 16 to 8 bits. For unknown reasonsi
this change results in reduced stability for the wireless connection.
This regression was located using bisection.

Fixes: 40b368af4b75 ("rtlwifi: Fix alignment issues")
Reported-and-tested-by: James Cameron <quozl@laptop.org>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index c2103e7a8132..bbb789f8990b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -1127,7 +1127,7 @@ static u8 _rtl8821ae_dbi_read(struct rtl_priv *rtlpriv, u16 addr)
 	}
 	if (0 == tmp) {
 		read_addr = REG_DBI_RDATA + addr % 4;
-		ret = rtl_read_byte(rtlpriv, read_addr);
+		ret = rtl_read_word(rtlpriv, read_addr);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 2b7e02267d3c8049b70fc44c410573fe0de8e6dc Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 9 Oct 2017 12:37:49 -0700
Subject: KEYS: encrypted: fix dereference of NULL user_key_payload

commit 13923d0865ca96312197962522e88bc0aedccd74 upstream.

A key of type "encrypted" references a "master key" which is used to
encrypt and decrypt the encrypted key's payload.  However, when we
accessed the master key's payload, we failed to handle the case where
the master key has been revoked, which sets the payload pointer to NULL.
Note that request_key() *does* skip revoked keys, but there is still a
window where the key can be revoked before we acquire its semaphore.

Fix it by checking for a NULL payload, treating it like a key which was
already revoked at the time it was requested.

This was an issue for master keys of type "user" only.  Master keys can
also be of type "trusted", but those cannot be revoked.

Fixes: 7e70cb497850 ("keys: add new key-type encrypted")
Reviewed-by: James Morris <james.l.morris@oracle.com>
Cc: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: David Safford <safford@us.ibm.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/keys/encrypted-keys/encrypted.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 31898856682e..dbd75de136d7 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -315,6 +315,13 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
 
 	down_read(&ukey->sem);
 	upayload = user_key_payload(ukey);
+	if (!upayload) {
+		/* key was revoked before we acquired its semaphore */
+		up_read(&ukey->sem);
+		key_put(ukey);
+		ukey = ERR_PTR(-EKEYREVOKED);
+		goto error;
+	}
 	*master_key = upayload->data;
 	*master_keylen = upayload->datalen;
 error:
-- 
cgit v1.2.3


From 503ef5c070a106b52fe34a04fdf02cf1f5662150 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 9 Oct 2017 12:43:20 -0700
Subject: lib/digsig: fix dereference of NULL user_key_payload

commit 192cabd6a296cbc57b3d8c05c4c89d87fc102506 upstream.

digsig_verify() requests a user key, then accesses its payload.
However, a revoked key has a NULL payload, and we failed to check for
this.  request_key() *does* skip revoked keys, but there is still a
window where the key can be revoked before we acquire its semaphore.

Fix it by checking for a NULL payload, treating it like a key which was
already revoked at the time it was requested.

Fixes: 051dbb918c7f ("crypto: digital signature verification support")
Reviewed-by: James Morris <james.l.morris@oracle.com>
Cc: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/digsig.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef4e2..00c5c8179393 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -87,6 +87,12 @@ static int digsig_verify_rsa(struct key *key,
 	down_read(&key->sem);
 	ukp = user_key_payload(key);
 
+	if (!ukp) {
+		/* key was revoked before we acquired its semaphore */
+		err = -EKEYREVOKED;
+		goto err1;
+	}
+
 	if (ukp->datalen < sizeof(*pkh))
 		goto err1;
 
-- 
cgit v1.2.3


From 33dea302f9bc1e2c41392a308cfb50f6c02bb096 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Oct 2017 16:00:41 +0100
Subject: KEYS: don't let add_key() update an uninstantiated key

commit 60ff5b2f547af3828aebafd54daded44cfb0807a upstream.

Currently, when passed a key that already exists, add_key() will call the
key's ->update() method if such exists.  But this is heavily broken in the
case where the key is uninstantiated because it doesn't call
__key_instantiate_and_link().  Consequently, it doesn't do most of the
things that are supposed to happen when the key is instantiated, such as
setting the instantiation state, clearing KEY_FLAG_USER_CONSTRUCT and
awakening tasks waiting on it, and incrementing key->user->nikeys.

It also never takes key_construction_mutex, which means that
->instantiate() can run concurrently with ->update() on the same key.  In
the case of the "user" and "logon" key types this causes a memory leak, at
best.  Maybe even worse, the ->update() methods of the "encrypted" and
"trusted" key types actually just dereference a NULL pointer when passed an
uninstantiated key.

Change key_create_or_update() to wait interruptibly for the key to finish
construction before continuing.

This patch only affects *uninstantiated* keys.  For now we still allow a
negatively instantiated key to be updated (thereby positively
instantiating it), although that's broken too (the next patch fixes it)
and I'm not sure that anyone actually uses that functionality either.

Here is a simple reproducer for the bug using the "encrypted" key type
(requires CONFIG_ENCRYPTED_KEYS=y), though as noted above the bug
pertained to more than just the "encrypted" key type:

    #include <stdlib.h>
    #include <unistd.h>
    #include <keyutils.h>

    int main(void)
    {
        int ringid = keyctl_join_session_keyring(NULL);

        if (fork()) {
            for (;;) {
                const char payload[] = "update user:foo 32";

                usleep(rand() % 10000);
                add_key("encrypted", "desc", payload, sizeof(payload), ringid);
                keyctl_clear(ringid);
            }
        } else {
            for (;;)
                request_key("encrypted", "desc", "callout_info", ringid);
        }
    }

It causes:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
    IP: encrypted_update+0xb0/0x170
    PGD 7a178067 P4D 7a178067 PUD 77269067 PMD 0
    PREEMPT SMP
    CPU: 0 PID: 340 Comm: reproduce Tainted: G      D         4.14.0-rc1-00025-g428490e38b2e #796
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    task: ffff8a467a39a340 task.stack: ffffb15c40770000
    RIP: 0010:encrypted_update+0xb0/0x170
    RSP: 0018:ffffb15c40773de8 EFLAGS: 00010246
    RAX: 0000000000000000 RBX: ffff8a467a275b00 RCX: 0000000000000000
    RDX: 0000000000000005 RSI: ffff8a467a275b14 RDI: ffffffffb742f303
    RBP: ffffb15c40773e20 R08: 0000000000000000 R09: ffff8a467a275b17
    R10: 0000000000000020 R11: 0000000000000000 R12: 0000000000000000
    R13: 0000000000000000 R14: ffff8a4677057180 R15: ffff8a467a275b0f
    FS:  00007f5d7fb08700(0000) GS:ffff8a467f200000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 0000000000000018 CR3: 0000000077262005 CR4: 00000000001606f0
    Call Trace:
     key_create_or_update+0x2bc/0x460
     SyS_add_key+0x10c/0x1d0
     entry_SYSCALL_64_fastpath+0x1f/0xbe
    RIP: 0033:0x7f5d7f211259
    RSP: 002b:00007ffed03904c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000f8
    RAX: ffffffffffffffda RBX: 000000003b2a7955 RCX: 00007f5d7f211259
    RDX: 00000000004009e4 RSI: 00000000004009ff RDI: 0000000000400a04
    RBP: 0000000068db8bad R08: 000000003b2a7955 R09: 0000000000000004
    R10: 000000000000001a R11: 0000000000000246 R12: 0000000000400868
    R13: 00007ffed03905d0 R14: 0000000000000000 R15: 0000000000000000
    Code: 77 28 e8 64 34 1f 00 45 31 c0 31 c9 48 8d 55 c8 48 89 df 48 8d 75 d0 e8 ff f9 ff ff 85 c0 41 89 c4 0f 88 84 00 00 00 4c 8b 7d c8 <49> 8b 75 18 4c 89 ff e8 24 f8 ff ff 85 c0 41 89 c4 78 6d 49 8b
    RIP: encrypted_update+0xb0/0x170 RSP: ffffb15c40773de8
    CR2: 0000000000000018

Reported-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Eric Biggers <ebiggers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/keys/key.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/security/keys/key.c b/security/keys/key.c
index 51d23c623424..2751ab4a7946 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -907,6 +907,16 @@ error:
 	 */
 	__key_link_end(keyring, &index_key, edit);
 
+	key = key_ref_to_ptr(key_ref);
+	if (test_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) {
+		ret = wait_for_key_construction(key, true);
+		if (ret < 0) {
+			key_ref_put(key_ref);
+			key_ref = ERR_PTR(ret);
+			goto error_free_prep;
+		}
+	}
+
 	key_ref = __key_update(key_ref, &prep);
 	goto error_free_prep;
 }
-- 
cgit v1.2.3


From 6f0dee7d9c9b815c45b96cad2ab2958b4afe5840 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <eric.sesterhenn@x41-dsec.de>
Date: Sun, 8 Oct 2017 20:02:32 +0200
Subject: pkcs7: Prevent NULL pointer dereference, since sinfo is not always
 set.

commit 68a1fdbbf8bd3378325e45c19e167a165f9ffc3a upstream.

The ASN.1 parser does not necessarily set the sinfo field,
this patch prevents a NULL pointer dereference on broken
input.

Fixes: 99db44350672 ("PKCS#7: Appropriately restrict authenticated attributes and content type")
Signed-off-by: Eric Sesterhenn <eric.sesterhenn@x41-dsec.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/pkcs7_parser.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 8f3056cd0399..2516e97c58f1 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -90,6 +90,9 @@ static int pkcs7_check_authattrs(struct pkcs7_message *msg)
 	bool want;
 
 	sinfo = msg->signed_infos;
+	if (!sinfo)
+		goto inconsistent;
+
 	if (sinfo->authattrs) {
 		want = true;
 		msg->have_authattrs = true;
-- 
cgit v1.2.3


From 558ca24dc296a859af75edf495a0972a00e9200d Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Fri, 28 Oct 2016 22:13:42 +0200
Subject: parisc: Avoid trashing sr2 and sr3 in LWS code

commit f4125cfdb3008363137f744c101e5d76ead760ba upstream.

There is no need to trash sr2 and sr3 in the Light-weight syscall (LWS).  sr2
already points to kernel space (it's zero in userspace, otherwise syscalls
wouldn't work), and since the LWS code is executed in userspace, we can simply
ignore to preload sr3.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/kernel/syscall.S | 53 ++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index a86b19fccb63..196973ead9b8 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -479,11 +479,6 @@ lws_start:
 	comiclr,>>	__NR_lws_entries, %r20, %r0
 	b,n	lws_exit_nosys
 
-	/* WARNING: Trashing sr2 and sr3 */
-	mfsp	%sr7,%r1			/* get userspace into sr3 */
-	mtsp	%r1,%sr3
-	mtsp	%r0,%sr2			/* get kernel space into sr2 */
-
 	/* Load table start */
 	ldil	L%lws_table, %r1
 	ldo	R%lws_table(%r1), %r28	/* Scratch use of r28 */
@@ -632,9 +627,9 @@ cas_action:
 	stw	%r1, 4(%sr2,%r20)
 #endif
 	/* The load and store could fail */
-1:	ldw,ma	0(%sr3,%r26), %r28
+1:	ldw,ma	0(%r26), %r28
 	sub,<>	%r28, %r25, %r0
-2:	stw,ma	%r24, 0(%sr3,%r26)
+2:	stw,ma	%r24, 0(%r26)
 	/* Free lock */
 	stw,ma	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
@@ -711,9 +706,9 @@ lws_compare_and_swap_2:
 	nop
 
 	/* 8bit load */
-4:	ldb	0(%sr3,%r25), %r25
+4:	ldb	0(%r25), %r25
 	b	cas2_lock_start
-5:	ldb	0(%sr3,%r24), %r24
+5:	ldb	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -721,9 +716,9 @@ lws_compare_and_swap_2:
 	nop
 
 	/* 16bit load */
-6:	ldh	0(%sr3,%r25), %r25
+6:	ldh	0(%r25), %r25
 	b	cas2_lock_start
-7:	ldh	0(%sr3,%r24), %r24
+7:	ldh	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -731,9 +726,9 @@ lws_compare_and_swap_2:
 	nop
 
 	/* 32bit load */
-8:	ldw	0(%sr3,%r25), %r25
+8:	ldw	0(%r25), %r25
 	b	cas2_lock_start
-9:	ldw	0(%sr3,%r24), %r24
+9:	ldw	0(%r24), %r24
 	nop
 	nop
 	nop
@@ -742,14 +737,14 @@ lws_compare_and_swap_2:
 
 	/* 64bit load */
 #ifdef CONFIG_64BIT
-10:	ldd	0(%sr3,%r25), %r25
-11:	ldd	0(%sr3,%r24), %r24
+10:	ldd	0(%r25), %r25
+11:	ldd	0(%r24), %r24
 #else
 	/* Load new value into r22/r23 - high/low */
-10:	ldw	0(%sr3,%r25), %r22
-11:	ldw	4(%sr3,%r25), %r23
+10:	ldw	0(%r25), %r22
+11:	ldw	4(%r25), %r23
 	/* Load new value into fr4 for atomic store later */
-12:	flddx	0(%sr3,%r24), %fr4
+12:	flddx	0(%r24), %fr4
 #endif
 
 cas2_lock_start:
@@ -799,30 +794,30 @@ cas2_action:
 	ldo	1(%r0),%r28
 
 	/* 8bit CAS */
-13:	ldb,ma	0(%sr3,%r26), %r29
+13:	ldb,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-14:	stb,ma	%r24, 0(%sr3,%r26)
+14:	stb,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 16bit CAS */
-15:	ldh,ma	0(%sr3,%r26), %r29
+15:	ldh,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-16:	sth,ma	%r24, 0(%sr3,%r26)
+16:	sth,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
 	nop
 
 	/* 32bit CAS */
-17:	ldw,ma	0(%sr3,%r26), %r29
+17:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r25, %r0
 	b,n	cas2_end
-18:	stw,ma	%r24, 0(%sr3,%r26)
+18:	stw,ma	%r24, 0(%r26)
 	b	cas2_end
 	copy	%r0, %r28
 	nop
@@ -830,22 +825,22 @@ cas2_action:
 
 	/* 64bit CAS */
 #ifdef CONFIG_64BIT
-19:	ldd,ma	0(%sr3,%r26), %r29
+19:	ldd,ma	0(%r26), %r29
 	sub,*=	%r29, %r25, %r0
 	b,n	cas2_end
-20:	std,ma	%r24, 0(%sr3,%r26)
+20:	std,ma	%r24, 0(%r26)
 	copy	%r0, %r28
 #else
 	/* Compare first word */
-19:	ldw,ma	0(%sr3,%r26), %r29
+19:	ldw,ma	0(%r26), %r29
 	sub,=	%r29, %r22, %r0
 	b,n	cas2_end
 	/* Compare second word */
-20:	ldw,ma	4(%sr3,%r26), %r29
+20:	ldw,ma	4(%r26), %r29
 	sub,=	%r29, %r23, %r0
 	b,n	cas2_end
 	/* Perform the store */
-21:	fstdx	%fr4, 0(%sr3,%r26)
+21:	fstdx	%fr4, 0(%r26)
 	copy	%r0, %r28
 #endif
 
-- 
cgit v1.2.3


From fcc65ab173ebf797472b046f2d84663fbbe443a7 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Sat, 30 Sep 2017 17:24:23 -0400
Subject: parisc: Fix double-word compare and exchange in LWS code on 32-bit
 kernels

commit 374b3bf8e8b519f61eb9775888074c6e46b3bf0c upstream.

As discussed on the debian-hppa list, double-wordcompare and exchange
operations fail on 32-bit kernels.  Looking at the code, I realized that
the ",ma" completer does the wrong thing in the  "ldw,ma  4(%r26), %r29"
instruction.  This increments %r26 and causes the following store to
write to the wrong location.

Note by Helge Deller:
The patch applies cleanly to stable kernel series if this upstream
commit is merged in advance:
f4125cfdb300 ("parisc: Avoid trashing sr2 and sr3 in LWS code").

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Tested-by: Christoph Biedl <debian.axhn@manchmal.in-ulm.de>
Fixes: 89206491201c ("parisc: Implement new LWS CAS supporting 64 bit operations.")
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/parisc/kernel/syscall.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 196973ead9b8..c6b855f7892c 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -740,7 +740,7 @@ lws_compare_and_swap_2:
 10:	ldd	0(%r25), %r25
 11:	ldd	0(%r24), %r24
 #else
-	/* Load new value into r22/r23 - high/low */
+	/* Load old value into r22/r23 - high/low */
 10:	ldw	0(%r25), %r22
 11:	ldw	4(%r25), %r23
 	/* Load new value into fr4 for atomic store later */
@@ -832,11 +832,11 @@ cas2_action:
 	copy	%r0, %r28
 #else
 	/* Compare first word */
-19:	ldw,ma	0(%r26), %r29
+19:	ldw	0(%r26), %r29
 	sub,=	%r29, %r22, %r0
 	b,n	cas2_end
 	/* Compare second word */
-20:	ldw,ma	4(%r26), %r29
+20:	ldw	4(%r26), %r29
 	sub,=	%r29, %r23, %r0
 	b,n	cas2_end
 	/* Perform the store */
-- 
cgit v1.2.3


From 0f85c0954be46bbd36960191daa447ad86b98f0b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 14 Nov 2016 19:46:09 +0100
Subject: sched/autogroup: Fix autogroup_move_group() to never skip
 sched_move_task()

commit 18f649ef344127ef6de23a5a4272dbe2fdb73dde upstream.

The PF_EXITING check in task_wants_autogroup() is no longer needed. Remove
it, but see the next patch.

However the comment is correct in that autogroup_move_group() must always
change task_group() for every thread so the sysctl_ check is very wrong;
we can race with cgroups and even sys_setsid() is not safe because a task
running with task_group() == ag->tg must participate in refcounting:

	int main(void)
	{
		int sctl = open("/proc/sys/kernel/sched_autogroup_enabled", O_WRONLY);

		assert(sctl > 0);
		if (fork()) {
			wait(NULL); // destroy the child's ag/tg
			pause();
		}

		assert(pwrite(sctl, "1\n", 2, 0) == 2);
		assert(setsid() > 0);
		if (fork())
			pause();

		kill(getppid(), SIGKILL);
		sleep(1);

		// The child has gone, the grandchild runs with kref == 1
		assert(pwrite(sctl, "0\n", 2, 0) == 2);
		assert(setsid() > 0);

		// runs with the freed ag/tg
		for (;;)
			sleep(1);

		return 0;
	}

crashes the kernel. It doesn't really need sleep(1), it doesn't matter if
autogroup_move_group() actually frees the task_group or this happens later.

Reported-by: Vern Lovejoy <vlovejoy@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: hartsjc@redhat.com
Cc: vbendel@redhat.com
Link: http://lkml.kernel.org/r/20161114184609.GA15965@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
 [sumits: submit to 4.4 LTS, post testing on Hikey]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/auto_group.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 750ed601ddf7..8620fd01b3d0 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -111,14 +111,11 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
 	if (tg != &root_task_group)
 		return false;
-
 	/*
-	 * We can only assume the task group can't go away on us if
-	 * autogroup_move_group() can see us on ->thread_group list.
+	 * If we race with autogroup_move_group() the caller can use the old
+	 * value of signal->autogroup but in this case sched_move_task() will
+	 * be called again before autogroup_kref_put().
 	 */
-	if (p->flags & PF_EXITING)
-		return false;
-
 	return true;
 }
 
@@ -138,13 +135,17 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 	}
 
 	p->signal->autogroup = autogroup_kref_get(ag);
-
-	if (!READ_ONCE(sysctl_sched_autogroup_enabled))
-		goto out;
-
+	/*
+	 * We can't avoid sched_move_task() after we changed signal->autogroup,
+	 * this process can already run with task_group() == prev->tg or we can
+	 * race with cgroup code which can read autogroup = prev under rq->lock.
+	 * In the latter case for_each_thread() can not miss a migrating thread,
+	 * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
+	 * can't be removed from thread list, we hold ->siglock.
+	 */
 	for_each_thread(p, t)
 		sched_move_task(t);
-out:
+
 	unlock_task_sighand(p, &flags);
 	autogroup_kref_put(prev);
 }
-- 
cgit v1.2.3


From 4db9f1113196e7b4df4e754e7e770b22aee81c01 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 5 Feb 2016 19:19:01 -0800
Subject: f2fs crypto: replace some BUG_ON()'s with error checks

commit 66aa3e1274fcf887e9d6501a68163270fc7718e7 upstream.

This patch adopts:
	ext4 crypto: replace some BUG_ON()'s with error checks

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/crypto.c       |  1 -
 fs/f2fs/crypto_fname.c |  2 --
 fs/f2fs/crypto_key.c   | 15 ++++++++++++---
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index 4a62ef14e932..d879c6c846b7 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -362,7 +362,6 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
 	else
 		res = crypto_ablkcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
-		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
index 38349ed5ea51..0fce444dd5ae 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/f2fs/crypto_fname.c
@@ -124,7 +124,6 @@ static int f2fs_fname_encrypt(struct inode *inode,
 	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
 	res = crypto_ablkcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
-		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
@@ -180,7 +179,6 @@ static int f2fs_fname_decrypt(struct inode *inode,
 	ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
 	res = crypto_ablkcipher_decrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
-		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 18595d7a0efc..81c87f7a3251 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -75,7 +75,6 @@ static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
 					F2FS_AES_256_XTS_KEY_SIZE, NULL);
 	res = crypto_ablkcipher_encrypt(req);
 	if (res == -EINPROGRESS || res == -EBUSY) {
-		BUG_ON(req->base.data != &ecr);
 		wait_for_completion(&ecr.completion);
 		res = ecr.res;
 	}
@@ -189,7 +188,11 @@ int f2fs_get_encryption_info(struct inode *inode)
 		keyring_key = NULL;
 		goto out;
 	}
-	BUG_ON(keyring_key->type != &key_type_logon);
+	if (keyring_key->type != &key_type_logon) {
+		printk_once(KERN_WARNING "f2fs: key type must be logon\n");
+		res = -ENOKEY;
+		goto out;
+	}
 	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
 		res = -EINVAL;
@@ -198,7 +201,13 @@ int f2fs_get_encryption_info(struct inode *inode)
 	master_key = (struct f2fs_encryption_key *)ukp->data;
 	BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE !=
 				F2FS_KEY_DERIVATION_NONCE_SIZE);
-	BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE);
+	if (master_key->size != F2FS_AES_256_XTS_KEY_SIZE) {
+		printk_once(KERN_WARNING
+				"f2fs: key size incorrect: %d\n",
+				master_key->size);
+		res = -ENOKEY;
+		goto out;
+	}
 	res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
 				  raw_key);
 	if (res)
-- 
cgit v1.2.3


From 7d9e13d953f2a3029d8b26a6f9a7dae83a4594ae Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 5 Feb 2016 19:38:42 -0800
Subject: f2fs crypto: add missing locking for keyring_key access

commit 745e8490b1e960ad79859dd8ba6a0b5a8d3d994e upstream.

This patch adopts:
	ext4 crypto: add missing locking for keyring_key access

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/crypto_key.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 81c87f7a3251..ae49be377b60 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -193,9 +193,11 @@ int f2fs_get_encryption_info(struct inode *inode)
 		res = -ENOKEY;
 		goto out;
 	}
+	down_read(&keyring_key->sem);
 	ukp = user_key_payload(keyring_key);
 	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
 		res = -EINVAL;
+		up_read(&keyring_key->sem);
 		goto out;
 	}
 	master_key = (struct f2fs_encryption_key *)ukp->data;
@@ -206,10 +208,12 @@ int f2fs_get_encryption_info(struct inode *inode)
 				"f2fs: key size incorrect: %d\n",
 				master_key->size);
 		res = -ENOKEY;
+		up_read(&keyring_key->sem);
 		goto out;
 	}
 	res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
 				  raw_key);
+	up_read(&keyring_key->sem);
 	if (res)
 		goto out;
 
-- 
cgit v1.2.3


From 1dda04c761abf006402f7f5e9adb11f9044731c8 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 9 Oct 2017 12:46:18 -0700
Subject: fscrypt: fix dereference of NULL user_key_payload

commit d60b5b7854c3d135b869f74fb93eaf63cbb1991a upstream.

When an fscrypt-encrypted file is opened, we request the file's master
key from the keyrings service as a logon key, then access its payload.
However, a revoked key has a NULL payload, and we failed to check for
this.  request_key() *does* skip revoked keys, but there is still a
window where the key can be revoked before we acquire its semaphore.

Fix it by checking for a NULL payload, treating it like a key which was
already revoked at the time it was requested.

Fixes: 88bd6ccdcdd6 ("ext4 crypto: add encryption key management facilities")
Reviewed-by: James Morris <james.l.morris@oracle.com>
Cc: <stable@vger.kernel.org>    [v4.1+]
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/crypto_key.c | 6 ++++++
 fs/f2fs/crypto_key.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 505f8afde57c..9a1bc638abce 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -204,6 +204,12 @@ int ext4_get_encryption_info(struct inode *inode)
 	}
 	down_read(&keyring_key->sem);
 	ukp = user_key_payload(keyring_key);
+	if (!ukp) {
+		/* key was revoked before we acquired its semaphore */
+		res = -EKEYREVOKED;
+		up_read(&keyring_key->sem);
+		goto out;
+	}
 	if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
 		res = -EINVAL;
 		up_read(&keyring_key->sem);
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index ae49be377b60..7e62889a1d3d 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -195,6 +195,12 @@ int f2fs_get_encryption_info(struct inode *inode)
 	}
 	down_read(&keyring_key->sem);
 	ukp = user_key_payload(keyring_key);
+	if (!ukp) {
+		/* key was revoked before we acquired its semaphore */
+		res = -EKEYREVOKED;
+		up_read(&keyring_key->sem);
+		goto out;
+	}
 	if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
 		res = -EINVAL;
 		up_read(&keyring_key->sem);
-- 
cgit v1.2.3


From 8a004caec12bf241e567e3640401256cc9bc2e45 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 4 Oct 2017 16:43:25 +0100
Subject: KEYS: Fix race between updating and finding a negative key

commit 363b02dab09b3226f3bd1420dad9c72b79a42a76 upstream.

Consolidate KEY_FLAG_INSTANTIATED, KEY_FLAG_NEGATIVE and the rejection
error into one field such that:

 (1) The instantiation state can be modified/read atomically.

 (2) The error can be accessed atomically with the state.

 (3) The error isn't stored unioned with the payload pointers.

This deals with the problem that the state is spread over three different
objects (two bits and a separate variable) and reading or updating them
atomically isn't practical, given that not only can uninstantiated keys
change into instantiated or rejected keys, but rejected keys can also turn
into instantiated keys - and someone accessing the key might not be using
any locking.

The main side effect of this problem is that what was held in the payload
may change, depending on the state.  For instance, you might observe the
key to be in the rejected state.  You then read the cached error, but if
the key semaphore wasn't locked, the key might've become instantiated
between the two reads - and you might now have something in hand that isn't
actually an error code.

The state is now KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE or a negative error
code if the key is negatively instantiated.  The key_is_instantiated()
function is replaced with key_is_positive() to avoid confusion as negative
keys are also 'instantiated'.

Additionally, barriering is included:

 (1) Order payload-set before state-set during instantiation.

 (2) Order state-read before payload-read when using the key.

Further separate barriering is necessary if RCU is being used to access the
payload content after reading the payload pointers.

Fixes: 146aa8b1453b ("KEYS: Merge the type-specific data with the payload data")
Reported-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/key.h                      | 49 ++++++++++++++++++++------------
 net/dns_resolver/dns_key.c               |  2 +-
 security/keys/big_key.c                  |  4 +--
 security/keys/encrypted-keys/encrypted.c |  2 +-
 security/keys/gc.c                       |  8 +++---
 security/keys/key.c                      | 31 +++++++++++++-------
 security/keys/keyctl.c                   |  9 +++---
 security/keys/keyring.c                  | 10 +++----
 security/keys/proc.c                     |  7 +++--
 security/keys/process_keys.c             |  2 +-
 security/keys/request_key.c              |  7 ++---
 security/keys/request_key_auth.c         |  2 +-
 security/keys/trusted.c                  |  2 +-
 security/keys/user_defined.c             |  4 +--
 14 files changed, 81 insertions(+), 58 deletions(-)

diff --git a/include/linux/key.h b/include/linux/key.h
index dcc115e8dd03..af071ca73079 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -126,6 +126,11 @@ static inline bool is_key_possessed(const key_ref_t key_ref)
 	return (unsigned long) key_ref & 1UL;
 }
 
+enum key_state {
+	KEY_IS_UNINSTANTIATED,
+	KEY_IS_POSITIVE,		/* Positively instantiated */
+};
+
 /*****************************************************************************/
 /*
  * authentication token / access credential / keyring
@@ -157,6 +162,7 @@ struct key {
 						 * - may not match RCU dereferenced payload
 						 * - payload should contain own length
 						 */
+	short			state;		/* Key state (+) or rejection error (-) */
 
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
@@ -165,19 +171,17 @@ struct key {
 #endif
 
 	unsigned long		flags;		/* status flags (change with bitops) */
-#define KEY_FLAG_INSTANTIATED	0	/* set if key has been instantiated */
-#define KEY_FLAG_DEAD		1	/* set if key type has been deleted */
-#define KEY_FLAG_REVOKED	2	/* set if key had been revoked */
-#define KEY_FLAG_IN_QUOTA	3	/* set if key consumes quota */
-#define KEY_FLAG_USER_CONSTRUCT	4	/* set if key is being constructed in userspace */
-#define KEY_FLAG_NEGATIVE	5	/* set if key is negative */
-#define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
-#define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
-#define KEY_FLAG_TRUSTED	8	/* set if key is trusted */
-#define KEY_FLAG_TRUSTED_ONLY	9	/* set if keyring only accepts links to trusted keys */
-#define KEY_FLAG_BUILTIN	10	/* set if key is builtin */
-#define KEY_FLAG_ROOT_CAN_INVAL	11	/* set if key can be invalidated by root without permission */
-#define KEY_FLAG_UID_KEYRING	12	/* set if key is a user or user session keyring */
+#define KEY_FLAG_DEAD		0	/* set if key type has been deleted */
+#define KEY_FLAG_REVOKED	1	/* set if key had been revoked */
+#define KEY_FLAG_IN_QUOTA	2	/* set if key consumes quota */
+#define KEY_FLAG_USER_CONSTRUCT	3	/* set if key is being constructed in userspace */
+#define KEY_FLAG_ROOT_CAN_CLEAR	4	/* set if key can be cleared by root without permission */
+#define KEY_FLAG_INVALIDATED	5	/* set if key has been invalidated */
+#define KEY_FLAG_TRUSTED	6	/* set if key is trusted */
+#define KEY_FLAG_TRUSTED_ONLY	7	/* set if keyring only accepts links to trusted keys */
+#define KEY_FLAG_BUILTIN	8	/* set if key is builtin */
+#define KEY_FLAG_ROOT_CAN_INVAL	9	/* set if key can be invalidated by root without permission */
+#define KEY_FLAG_UID_KEYRING	10	/* set if key is a user or user session keyring */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
@@ -203,7 +207,6 @@ struct key {
 			struct list_head name_link;
 			struct assoc_array keys;
 		};
-		int reject_error;
 	};
 };
 
@@ -319,17 +322,27 @@ extern void key_set_timeout(struct key *, unsigned);
 #define	KEY_NEED_SETATTR 0x20	/* Require permission to change attributes */
 #define	KEY_NEED_ALL	0x3f	/* All the above permissions */
 
+static inline short key_read_state(const struct key *key)
+{
+	/* Barrier versus mark_key_instantiated(). */
+	return smp_load_acquire(&key->state);
+}
+
 /**
- * key_is_instantiated - Determine if a key has been positively instantiated
+ * key_is_positive - Determine if a key has been positively instantiated
  * @key: The key to check.
  *
  * Return true if the specified key has been positively instantiated, false
  * otherwise.
  */
-static inline bool key_is_instantiated(const struct key *key)
+static inline bool key_is_positive(const struct key *key)
+{
+	return key_read_state(key) == KEY_IS_POSITIVE;
+}
+
+static inline bool key_is_negative(const struct key *key)
 {
-	return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) &&
-		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
+	return key_read_state(key) < 0;
 }
 
 #define rcu_dereference_key(KEY)					\
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index c79b85eb4d4c..6abc5012200b 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
 static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 {
 	seq_puts(m, key->description);
-	if (key_is_instantiated(key)) {
+	if (key_is_positive(key)) {
 		int err = PTR_ERR(key->payload.data[dns_key_error]);
 
 		if (err)
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 907c1522ee46..08c4cc5c2973 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -138,7 +138,7 @@ void big_key_revoke(struct key *key)
 
 	/* clear the quota */
 	key_payload_reserve(key, 0);
-	if (key_is_instantiated(key) &&
+	if (key_is_positive(key) &&
 	    (size_t)key->payload.data[big_key_len] > BIG_KEY_FILE_THRESHOLD)
 		vfs_truncate(path, 0);
 }
@@ -170,7 +170,7 @@ void big_key_describe(const struct key *key, struct seq_file *m)
 
 	seq_puts(m, key->description);
 
-	if (key_is_instantiated(key))
+	if (key_is_positive(key))
 		seq_printf(m, ": %zu [%s]",
 			   datalen,
 			   datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff");
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index dbd75de136d7..ce295c0c1da0 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -852,7 +852,7 @@ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep)
 	size_t datalen = prep->datalen;
 	int ret = 0;
 
-	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+	if (key_is_negative(key))
 		return -ENOKEY;
 	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 9cb4fe4478a1..1659094d684d 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -129,15 +129,15 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 	while (!list_empty(keys)) {
 		struct key *key =
 			list_entry(keys->next, struct key, graveyard_link);
+		short state = key->state;
+
 		list_del(&key->graveyard_link);
 
 		kdebug("- %u", key->serial);
 		key_check(key);
 
 		/* Throw away the key data if the key is instantiated */
-		if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags) &&
-		    !test_bit(KEY_FLAG_NEGATIVE, &key->flags) &&
-		    key->type->destroy)
+		if (state == KEY_IS_POSITIVE && key->type->destroy)
 			key->type->destroy(key);
 
 		security_key_free(key);
@@ -151,7 +151,7 @@ static noinline void key_gc_unused_keys(struct list_head *keys)
 		}
 
 		atomic_dec(&key->user->nkeys);
-		if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
+		if (state != KEY_IS_UNINSTANTIATED)
 			atomic_dec(&key->user->nikeys);
 
 		key_user_put(key->user);
diff --git a/security/keys/key.c b/security/keys/key.c
index 2751ab4a7946..4d971bf88ac3 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -395,6 +395,18 @@ int key_payload_reserve(struct key *key, size_t datalen)
 }
 EXPORT_SYMBOL(key_payload_reserve);
 
+/*
+ * Change the key state to being instantiated.
+ */
+static void mark_key_instantiated(struct key *key, int reject_error)
+{
+	/* Commit the payload before setting the state; barrier versus
+	 * key_read_state().
+	 */
+	smp_store_release(&key->state,
+			  (reject_error < 0) ? reject_error : KEY_IS_POSITIVE);
+}
+
 /*
  * Instantiate a key and link it into the target keyring atomically.  Must be
  * called with the target keyring's semaphore writelocked.  The target key's
@@ -418,14 +430,14 @@ static int __key_instantiate_and_link(struct key *key,
 	mutex_lock(&key_construction_mutex);
 
 	/* can't instantiate twice */
-	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
+	if (key->state == KEY_IS_UNINSTANTIATED) {
 		/* instantiate the key */
 		ret = key->type->instantiate(key, prep);
 
 		if (ret == 0) {
 			/* mark the key as being instantiated */
 			atomic_inc(&key->user->nikeys);
-			set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
+			mark_key_instantiated(key, 0);
 
 			if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
 				awaken = 1;
@@ -553,13 +565,10 @@ int key_reject_and_link(struct key *key,
 	mutex_lock(&key_construction_mutex);
 
 	/* can't instantiate twice */
-	if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
+	if (key->state == KEY_IS_UNINSTANTIATED) {
 		/* mark the key as being negatively instantiated */
 		atomic_inc(&key->user->nikeys);
-		key->reject_error = -error;
-		smp_wmb();
-		set_bit(KEY_FLAG_NEGATIVE, &key->flags);
-		set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
+		mark_key_instantiated(key, -error);
 		now = current_kernel_time();
 		key->expiry = now.tv_sec + timeout;
 		key_schedule_gc(key->expiry + key_gc_delay);
@@ -731,8 +740,8 @@ static inline key_ref_t __key_update(key_ref_t key_ref,
 
 	ret = key->type->update(key, prep);
 	if (ret == 0)
-		/* updating a negative key instantiates it */
-		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+		/* Updating a negative key positively instantiates it */
+		mark_key_instantiated(key, 0);
 
 	up_write(&key->sem);
 
@@ -967,8 +976,8 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 
 	ret = key->type->update(key, &prep);
 	if (ret == 0)
-		/* updating a negative key instantiates it */
-		clear_bit(KEY_FLAG_NEGATIVE, &key->flags);
+		/* Updating a negative key positively instantiates it */
+		mark_key_instantiated(key, 0);
 
 	up_write(&key->sem);
 
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index a009dc66eb8f..2e741e1a8712 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -738,10 +738,9 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 
 	key = key_ref_to_ptr(key_ref);
 
-	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
-		ret = -ENOKEY;
-		goto error2;
-	}
+	ret = key_read_state(key);
+	if (ret < 0)
+		goto error2; /* Negatively instantiated */
 
 	/* see if we can read it directly */
 	ret = key_permission(key_ref, KEY_NEED_READ);
@@ -873,7 +872,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
 		atomic_dec(&key->user->nkeys);
 		atomic_inc(&newowner->nkeys);
 
-		if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) {
+		if (key->state != KEY_IS_UNINSTANTIATED) {
 			atomic_dec(&key->user->nikeys);
 			atomic_inc(&newowner->nikeys);
 		}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 0c8dd4fbe130..ef828238cdc0 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -407,7 +407,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
 	else
 		seq_puts(m, "[anon]");
 
-	if (key_is_instantiated(keyring)) {
+	if (key_is_positive(keyring)) {
 		if (keyring->keys.nr_leaves_on_tree != 0)
 			seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree);
 		else
@@ -522,7 +522,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 {
 	struct keyring_search_context *ctx = iterator_data;
 	const struct key *key = keyring_ptr_to_key(object);
-	unsigned long kflags = key->flags;
+	unsigned long kflags = READ_ONCE(key->flags);
+	short state = READ_ONCE(key->state);
 
 	kenter("{%d}", key->serial);
 
@@ -566,9 +567,8 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 
 	if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) {
 		/* we set a different error code if we pass a negative key */
-		if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
-			smp_rmb();
-			ctx->result = ERR_PTR(key->reject_error);
+		if (state < 0) {
+			ctx->result = ERR_PTR(state);
 			kleave(" = %d [neg]", ctx->skipped_ret);
 			goto skipped;
 		}
diff --git a/security/keys/proc.c b/security/keys/proc.c
index b9f531c9e4fa..036128682463 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -182,6 +182,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
 	unsigned long timo;
 	key_ref_t key_ref, skey_ref;
 	char xbuf[16];
+	short state;
 	int rc;
 
 	struct keyring_search_context ctx = {
@@ -240,17 +241,19 @@ static int proc_keys_show(struct seq_file *m, void *v)
 			sprintf(xbuf, "%luw", timo / (60*60*24*7));
 	}
 
+	state = key_read_state(key);
+
 #define showflag(KEY, LETTER, FLAG) \
 	(test_bit(FLAG,	&(KEY)->flags) ? LETTER : '-')
 
 	seq_printf(m, "%08x %c%c%c%c%c%c%c %5d %4s %08x %5d %5d %-9.9s ",
 		   key->serial,
-		   showflag(key, 'I', KEY_FLAG_INSTANTIATED),
+		   state != KEY_IS_UNINSTANTIATED ? 'I' : '-',
 		   showflag(key, 'R', KEY_FLAG_REVOKED),
 		   showflag(key, 'D', KEY_FLAG_DEAD),
 		   showflag(key, 'Q', KEY_FLAG_IN_QUOTA),
 		   showflag(key, 'U', KEY_FLAG_USER_CONSTRUCT),
-		   showflag(key, 'N', KEY_FLAG_NEGATIVE),
+		   state < 0 ? 'N' : '-',
 		   showflag(key, 'i', KEY_FLAG_INVALIDATED),
 		   atomic_read(&key->usage),
 		   xbuf,
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 7dd050f24261..ac1d5b2b1626 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -727,7 +727,7 @@ try_again:
 
 	ret = -EIO;
 	if (!(lflags & KEY_LOOKUP_PARTIAL) &&
-	    !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
+	    key_read_state(key) == KEY_IS_UNINSTANTIATED)
 		goto invalid_key;
 
 	/* check the permissions */
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index c7a117c9a8f3..2ce733342b5a 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -594,10 +594,9 @@ int wait_for_key_construction(struct key *key, bool intr)
 			  intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
 	if (ret)
 		return -ERESTARTSYS;
-	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
-		smp_rmb();
-		return key->reject_error;
-	}
+	ret = key_read_state(key);
+	if (ret < 0)
+		return ret;
 	return key_validate(key);
 }
 EXPORT_SYMBOL(wait_for_key_construction);
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 4f0f112fe276..217775fcd0f3 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -73,7 +73,7 @@ static void request_key_auth_describe(const struct key *key,
 
 	seq_puts(m, "key:");
 	seq_puts(m, key->description);
-	if (key_is_instantiated(key))
+	if (key_is_positive(key))
 		seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len);
 }
 
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 16dec53184b6..509aedcf8310 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1014,7 +1014,7 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 	char *datablob;
 	int ret = 0;
 
-	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+	if (key_is_negative(key))
 		return -ENOKEY;
 	p = key->payload.data[0];
 	if (!p->migratable)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 8705d79b2c6f..eba8a516ee9e 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -120,7 +120,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 
 	if (ret == 0) {
 		/* attach the new data, displacing the old */
-		if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+		if (key_is_positive(key))
 			zap = key->payload.data[0];
 		else
 			zap = NULL;
@@ -174,7 +174,7 @@ EXPORT_SYMBOL_GPL(user_destroy);
 void user_describe(const struct key *key, struct seq_file *m)
 {
 	seq_puts(m, key->description);
-	if (key_is_instantiated(key))
+	if (key_is_positive(key))
 		seq_printf(m, ": %u", key->datalen);
 }
 
-- 
cgit v1.2.3


From 1bb1d4252d1ede47afea054979fb9d95fc891743 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 8 Sep 2016 14:20:38 -0700
Subject: fscrypto: require write access to mount to set encryption policy

commit ba63f23d69a3a10e7e527a02702023da68ef8a6d upstream.

[Please apply to 4.4-stable.  Note: this was already backported, but
only to ext4; it was missed that it should go to f2fs as well.  This is
needed to make xfstest generic/395 pass on f2fs.]

Since setting an encryption policy requires writing metadata to the
filesystem, it should be guarded by mnt_want_write/mnt_drop_write.
Otherwise, a user could cause a write to a frozen or readonly
filesystem.  This was handled correctly by f2fs but not by ext4.  Make
fscrypt_process_policy() handle it rather than relying on the filesystem
to get it right.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Acked-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/f2fs/file.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 4b449d263333..01eed94b01ea 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1541,12 +1541,18 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
 				sizeof(policy)))
 		return -EFAULT;
 
+	err = mnt_want_write_file(filp);
+	if (err)
+		return err;
+
 	mutex_lock(&inode->i_mutex);
 
 	err = f2fs_process_policy(&policy, inode);
 
 	mutex_unlock(&inode->i_mutex);
 
+	mnt_drop_write_file(filp);
+
 	return err;
 #else
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From aa3a0a70bdb8745864e41fca5f7722dfb3908d85 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 9 Oct 2017 12:40:00 -0700
Subject: FS-Cache: fix dereference of NULL user_key_payload

commit d124b2c53c7bee6569d2a2d0b18b4a1afde00134 upstream.

When the file /proc/fs/fscache/objects (available with
CONFIG_FSCACHE_OBJECT_LIST=y) is opened, we request a user key with
description "fscache:objlist", then access its payload.  However, a
revoked key has a NULL payload, and we failed to check for this.
request_key() *does* skip revoked keys, but there is still a window
where the key can be revoked before we access its payload.

Fix it by checking for a NULL payload, treating it like a key which was
already revoked at the time it was requested.

Fixes: 4fbf4291aa15 ("FS-Cache: Allow the current state of all objects to be dumped")
Reviewed-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fscache/object-list.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 6b028b7c4250..926580a85153 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -330,6 +330,13 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
 	rcu_read_lock();
 
 	confkey = user_key_payload(key);
+	if (!confkey) {
+		/* key was revoked */
+		rcu_read_unlock();
+		key_put(key);
+		goto no_config;
+	}
+
 	buf = confkey->data;
 
 	for (len = confkey->datalen - 1; len >= 0; len--) {
-- 
cgit v1.2.3


From 9b36699635c54b2e56ec3fc07a750dc465542a6d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 27 Oct 2017 10:23:18 +0200
Subject: Linux 4.4.95

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ff9d6bbf2210..57e1ea2a189a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 94
+SUBLEVEL = 95
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 
-- 
cgit v1.2.3


From fac311be26e5af64612c386f5a041984fe7c59a2 Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 25 Apr 2017 10:37:58 +0100
Subject: cpufreq/sched: Consider max cpu capacity when choosing frequencies

When using schedfreq on cpus with max capacity significantly smaller than
1024, the tick update uses non-normalised capacities - this leads to
selecting an incorrect OPP as we were scaling the frequency as if the
max capacity achievable was 1024 rather than the max for that particular
cpu or group. This could result in a cpu being stuck at the lowest OPP
and unable to generate enough utilisation to climb out if the max
capacity is significantly smaller than 1024.

Instead, normalize the capacity to be in the range 0-1024 in the tick
so that when we later select a frequency, we get the correct one.

Also comments updated to be clearer about what is needed.

Change-Id: Id84391c7ac015311002ada21813a353ee13bee60
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c  | 4 ++++
 kernel/sched/fair.c  | 4 ++--
 kernel/sched/sched.h | 4 ++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 83f7c682032b..9cf530a6123e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2987,7 +2987,9 @@ static void sched_freq_tick_pelt(int cpu)
 	 * utilization and to harm its performance the least, request
 	 * a jump to a higher OPP as soon as the margin of free capacity
 	 * is impacted (specified by capacity_margin).
+	 * Remember CPU utilization in sched_capacity_reqs should be normalised.
 	 */
+	cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
 	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 }
 
@@ -3014,7 +3016,9 @@ static void sched_freq_tick_walt(int cpu)
 	 * It is likely that the load is growing so we
 	 * keep the added margin in our request as an
 	 * extra boost.
+	 * Remember CPU utilization in sched_capacity_reqs should be normalised.
 	 */
+	cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
 	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
 
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5cac6a77b2bc..e6b2461d07d6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4671,7 +4671,7 @@ static void update_capacity_of(int cpu)
 	if (!sched_freq())
 		return;
 
-	/* Convert scale-invariant capacity to cpu. */
+	/* Normalize scale-invariant capacity to cpu. */
 	req_cap = boosted_cpu_util(cpu);
 	req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
 	set_cfs_cpu_capacity(cpu, true, req_cap);
@@ -4864,7 +4864,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			if (rq->cfs.nr_running)
 				update_capacity_of(cpu_of(rq));
 			else if (sched_freq())
-				set_cfs_cpu_capacity(cpu_of(rq), false, 0);
+				set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9f3d89faacdc..5256f05a26e8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1621,6 +1621,10 @@ static inline bool sched_freq(void)
 	return static_key_false(&__sched_freq);
 }
 
+/*
+ * sched_capacity_reqs expects capacity requests to be normalised.
+ * All capacities should sum to the range of 0-1024.
+ */
 DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
 void update_cpu_capacity_request(int cpu, bool request);
 
-- 
cgit v1.2.3


From 792510d9b392b392c763c5e395f046d5c246c66e Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 30 May 2017 14:51:53 +0100
Subject: BACKPORT: sched/fair: Make it possible to account fair load avg
 consistently

While set_task_rq_fair() is introduced in mainline by commit ad936d8658fd
("sched/fair: Make it possible to account fair load avg consistently"),
the function results to be introduced here by the backport of
commit 09a43ace1f98 ("sched/fair: Propagate load during synchronous
attach/detach"). The problem (apart from the confusion introduced by the
backport) is actually that set_task_rq_fair() is currently not called at
all.

Fix the problem by backporting again commit ad936d8658fd
("sched/fair: Make it possible to account fair load avg consistently").

Original change log:

The current code accounts for the time a task was absent from the fair
class (per ATTACH_AGE_LOAD). However it does not work correctly when a
task got migrated or moved to another cgroup while outside of the fair
class.

This patch tries to address that by aging on migration. We locklessly
read the 'last_update_time' stamp from both the old and new cfs_rq,
ages the load upto the old time, and sets it to the new time.

These timestamps should in general not be more than 1 tick apart from
one another, so there is a definite bound on things.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
[ Changelog, a few edits and !SMP build fix ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1445616981-29904-2-git-send-email-byungchul.park@lge.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked from ad936d8658fd348338cb7d42c577dac77892b074)
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Change-Id: I17294ab0ada3901d35895014715fd60952949358
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
---
 kernel/sched/core.c  |  4 ++++
 kernel/sched/sched.h | 11 ++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9cf530a6123e..e3242eed60e5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2173,6 +2173,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	INIT_LIST_HEAD(&p->se.group_node);
 	walt_init_new_task_load(p);
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	p->se.cfs_rq			= NULL;
+#endif
+
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5256f05a26e8..1d52ca8a613c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -335,7 +335,15 @@ extern void sched_move_task(struct task_struct *tsk);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
-#endif
+
+#ifdef CONFIG_SMP
+extern void set_task_rq_fair(struct sched_entity *se,
+			     struct cfs_rq *prev, struct cfs_rq *next);
+#else /* !CONFIG_SMP */
+static inline void set_task_rq_fair(struct sched_entity *se,
+			     struct cfs_rq *prev, struct cfs_rq *next) { }
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
 
 #else /* CONFIG_CGROUP_SCHED */
 
@@ -987,6 +995,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+	set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
 	p->se.cfs_rq = tg->cfs_rq[cpu];
 	p->se.parent = tg->se[cpu];
 #endif
-- 
cgit v1.2.3


From 6b02ab68ec78f41f647ebc32b1c37bd27fdb034e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Jun 2016 18:51:48 +0200
Subject: UPSTREAM: sched/fair: Fix and optimize the fork() path

The task_fork_fair() callback already calls __set_task_cpu() and takes
rq->lock.

If we move the sched_class::task_fork callback in sched_fork() under
the existing p->pi_lock, right after its set_task_cpu() call, we can
avoid doing two such calls and omit the IRQ disabling on the rq->lock.

Change to __set_task_cpu() to skip the migration bits, this is a new
task, not a migration. Similarly, make wake_up_new_task() use
__set_task_cpu() for the same reason, the task hasn't actually
migrated as it hasn't ever ran.

This cures the problem of calling migrate_task_rq_fair(), which does
remove_entity_from_load_avg() on tasks that have never been added to
the load avg to begin with.

This bug would result in transiently messed up load_avg values, averaged
out after a few dozen milliseconds. This is probably the reason why
this bug was not found for such a long time.

Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit e210bffd39d01b649c94b820c28ff112673266dd)
Change-Id: Icbddbaa6e8c1071859673d8685bc3f38955cf144
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c | 16 +++++++++++-----
 kernel/sched/fair.c | 27 ++++++---------------------
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e3242eed60e5..7e696bb3291a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2300,9 +2300,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_class = &fair_sched_class;
 	}
 
-	if (p->sched_class->task_fork)
-		p->sched_class->task_fork(p);
-
 	/*
 	 * The child is not yet in the pid-hash so no cgroup attach races,
 	 * and the cgroup is pinned to this child due to cgroup_fork()
@@ -2311,7 +2308,13 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	 * Silence PROVE_RCU.
 	 */
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	set_task_cpu(p, cpu);
+	/*
+	 * We're setting the cpu for the first time, we don't migrate,
+	 * so use __set_task_cpu().
+	 */
+	__set_task_cpu(p, cpu);
+	if (p->sched_class->task_fork)
+		p->sched_class->task_fork(p);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 #ifdef CONFIG_SCHED_INFO
@@ -2453,8 +2456,11 @@ void wake_up_new_task(struct task_struct *p)
 	 * Fork balancing, do it here and not earlier because:
 	 *  - cpus_allowed can change in the fork path
 	 *  - any previously selected cpu might disappear through hotplug
+	 *
+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
+	 * as we're not fully set-up yet.
 	 */
-	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
 	rq = __task_rq_lock(p);
 	post_init_entity_util_avg(&p->se);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e6b2461d07d6..d42a54f7b2e3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4712,7 +4712,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		 *
 		 * note: in the case of encountering a throttled cfs_rq we will
 		 * post the final h_nr_running increment below.
-		*/
+		 */
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 		cfs_rq->h_nr_running++;
@@ -9901,31 +9901,17 @@ static void task_fork_fair(struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se, *curr;
-	int this_cpu = smp_processor_id();
 	struct rq *rq = this_rq();
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
 
+	raw_spin_lock(&rq->lock);
 	update_rq_clock(rq);
 
 	cfs_rq = task_cfs_rq(current);
 	curr = cfs_rq->curr;
-
-	/*
-	 * Not only the cpu but also the task_group of the parent might have
-	 * been changed after parent->se.parent,cfs_rq were copied to
-	 * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
-	 * of child point to valid ones.
-	 */
-	rcu_read_lock();
-	__set_task_cpu(p, this_cpu);
-	rcu_read_unlock();
-
-	update_curr(cfs_rq);
-
-	if (curr)
+	if (curr) {
+		update_curr(cfs_rq);
 		se->vruntime = curr->vruntime;
+	}
 	place_entity(cfs_rq, se, 1);
 
 	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
@@ -9938,8 +9924,7 @@ static void task_fork_fair(struct task_struct *p)
 	}
 
 	se->vruntime -= cfs_rq->min_vruntime;
-
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_unlock(&rq->lock);
 }
 
 /*
-- 
cgit v1.2.3


From 138a670d97ca84a4cab83515a0920d4ef8eeb22a Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Fri, 17 Jun 2016 13:38:55 +0200
Subject: BACKPORT: sched/cgroup: Fix cpu_cgroup_fork() handling

A new fair task is detached and attached from/to task_group with:

  cgroup_post_fork()
    ss->fork(child) := cpu_cgroup_fork()
      sched_move_task()
        task_move_group_fair()

Which is wrong, because at this point in fork() the task isn't fully
initialized and it cannot 'move' to another group, because its not
attached to any group as yet.

In fact, cpu_cgroup_fork() needs a small part of sched_move_task() so we
can just call this small part directly instead sched_move_task(). And
the task doesn't really migrate because it is not yet attached so we
need the following sequence:

  do_fork()
    sched_fork()
      __set_task_cpu()

    cgroup_post_fork()
      set_task_rq() # set task group and runqueue

    wake_up_new_task()
      select_task_rq() can select a new cpu
      __set_task_cpu
      post_init_entity_util_avg
        attach_task_cfs_rq()
      activate_task
        enqueue_task

This patch makes that happen.

BACKPORT: Difference from original commit:

- Removed use of DEQUEUE_MOVE (which isn't defined in 4.4) in
  dequeue_task flags
- Replaced "struct rq_flags rf" with "unsigned long flags".

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
[ Added TASK_SET_GROUP to set depth properly. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit ea86cb4b7621e1298a37197005bf0abcc86348d4)
Change-Id: I8126fd923288acf961218431ffd29d6bf6fd8d72
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c  | 63 ++++++++++++++++++++++++++++++++++------------------
 kernel/sched/fair.c  | 23 ++++++++++++++++++-
 kernel/sched/sched.h |  5 ++++-
 3 files changed, 67 insertions(+), 24 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7e696bb3291a..0b5c588929e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8188,27 +8188,9 @@ void sched_offline_group(struct task_group *tg)
 	spin_unlock_irqrestore(&task_group_lock, flags);
 }
 
-/* change task's runqueue when it moves between groups.
- *	The caller of this function should have put the task in its new group
- *	by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
- *	reflect its new group.
- */
-void sched_move_task(struct task_struct *tsk)
+static void sched_change_group(struct task_struct *tsk, int type)
 {
 	struct task_group *tg;
-	int queued, running;
-	unsigned long flags;
-	struct rq *rq;
-
-	rq = task_rq_lock(tsk, &flags);
-
-	running = task_current(rq, tsk);
-	queued = task_on_rq_queued(tsk);
-
-	if (queued)
-		dequeue_task(rq, tsk, DEQUEUE_SAVE);
-	if (unlikely(running))
-		put_prev_task(rq, tsk);
 
 	/*
 	 * All callers are synchronized by task_rq_lock(); we do not use RCU
@@ -8221,11 +8203,37 @@ void sched_move_task(struct task_struct *tsk)
 	tsk->sched_task_group = tg;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	if (tsk->sched_class->task_move_group)
-		tsk->sched_class->task_move_group(tsk);
+	if (tsk->sched_class->task_change_group)
+		tsk->sched_class->task_change_group(tsk, type);
 	else
 #endif
 		set_task_rq(tsk, task_cpu(tsk));
+}
+
+/*
+ * Change task's runqueue when it moves between groups.
+ *
+ * The caller of this function should have put the task in its new group by
+ * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
+ * its new group.
+ */
+void sched_move_task(struct task_struct *tsk)
+{
+	int queued, running;
+	unsigned long flags;
+	struct rq *rq;
+
+	rq = task_rq_lock(tsk, &flags);
+
+	running = task_current(rq, tsk);
+	queued = task_on_rq_queued(tsk);
+
+	if (queued)
+		dequeue_task(rq, tsk, DEQUEUE_SAVE);
+	if (unlikely(running))
+		put_prev_task(rq, tsk);
+
+	sched_change_group(tsk, TASK_MOVE_GROUP);
 
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
@@ -8662,9 +8670,20 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 	sched_free_group(tg);
 }
 
+/*
+ * This is called before wake_up_new_task(), therefore we really only
+ * have to set its group bits, all the other stuff does not apply.
+ */
 static void cpu_cgroup_fork(struct task_struct *task, void *private)
 {
-	sched_move_task(task);
+	unsigned long flags;
+	struct rq *rq;
+
+	rq = task_rq_lock(task, &flags);
+
+	sched_change_group(task, TASK_SET_GROUP);
+
+	task_rq_unlock(rq, task, &flags);
 }
 
 static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d42a54f7b2e3..2afd81bfda99 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10116,6 +10116,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+static void task_set_group_fair(struct task_struct *p)
+{
+	struct sched_entity *se = &p->se;
+
+	set_task_rq(p, task_cpu(p));
+	se->depth = se->parent ? se->parent->depth + 1 : 0;
+}
+
 static void task_move_group_fair(struct task_struct *p)
 {
 	detach_task_cfs_rq(p);
@@ -10128,6 +10136,19 @@ static void task_move_group_fair(struct task_struct *p)
 	attach_task_cfs_rq(p);
 }
 
+static void task_change_group_fair(struct task_struct *p, int type)
+{
+	switch (type) {
+	case TASK_SET_GROUP:
+		task_set_group_fair(p);
+		break;
+
+	case TASK_MOVE_GROUP:
+		task_move_group_fair(p);
+		break;
+	}
+}
+
 void free_fair_sched_group(struct task_group *tg)
 {
 	int i;
@@ -10354,7 +10375,7 @@ const struct sched_class fair_sched_class = {
 	.update_curr		= update_curr_fair,
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	.task_move_group	= task_move_group_fair,
+	.task_change_group	= task_change_group_fair,
 #endif
 };
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1d52ca8a613c..8d3712107e61 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1293,8 +1293,11 @@ struct sched_class {
 
 	void (*update_curr) (struct rq *rq);
 
+#define TASK_SET_GROUP  0
+#define TASK_MOVE_GROUP	1
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	void (*task_move_group) (struct task_struct *p);
+	void (*task_change_group)(struct task_struct *p, int type);
 #endif
 };
 
-- 
cgit v1.2.3


From 97cb74f48599ca1ae6c17955882f167a4c3aaad2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Jun 2016 13:29:28 +0200
Subject: BACKPORT: sched/fair: Fix PELT integrity for new tasks

Vincent and Yuyang found another few scenarios in which entity
tracking goes wobbly.

The scenarios are basically due to the fact that new tasks are not
immediately attached and thereby differ from the normal situation -- a
task is always attached to a cfs_rq load average (such that it
includes its blocked contribution) and are explicitly
detached/attached on migration to another cfs_rq.

Scenario 1: switch to fair class

  p->sched_class = fair_class;
  if (queued)
    enqueue_task(p);
      ...
        enqueue_entity()
	  enqueue_entity_load_avg()
	    migrated = !sa->last_update_time (true)
	    if (migrated)
	      attach_entity_load_avg()
  check_class_changed()
    switched_from() (!fair)
    switched_to()   (fair)
      switched_to_fair()
        attach_entity_load_avg()

If @p is a new task that hasn't been fair before, it will have
!last_update_time and, per the above, end up in
attach_entity_load_avg() _twice_.

Scenario 2: change between cgroups

  sched_move_group(p)
    if (queued)
      dequeue_task()
    task_move_group_fair()
      detach_task_cfs_rq()
        detach_entity_load_avg()
      set_task_rq()
      attach_task_cfs_rq()
        attach_entity_load_avg()
    if (queued)
      enqueue_task();
        ...
          enqueue_entity()
	    enqueue_entity_load_avg()
	      migrated = !sa->last_update_time (true)
	      if (migrated)
	        attach_entity_load_avg()

Similar as with scenario 1, if @p is a new task, it will have
!load_update_time and we'll end up in attach_entity_load_avg()
_twice_.

Furthermore, notice how we do a detach_entity_load_avg() on something
that wasn't attached to begin with.

As stated above; the problem is that the new task isn't yet attached
to the load tracking and thereby violates the invariant assumption.

This patch remedies this by ensuring a new task is indeed properly
attached to the load tracking on creation, through
post_init_entity_util_avg().

Of course, this isn't entirely as straightforward as one might think,
since the task is hashed before we call wake_up_new_task() and thus
can be poked at. We avoid this by adding TASK_NEW and teaching
cpu_cgroup_can_attach() to refuse such tasks.

.:: BACKPORT

Complicated by the fact that mch of the lines changed by the original
of this commit were then changed by:

df217913e72e sched/fair: Factorize attach/detach entity <Vincent Guittot>

and then

d31b1a66cbe0 sched/fair: Factorize PELT update <Vincent Guittot>

, which have both already been backported here.

Reported-by: Yuyang Du <yuyang.du@intel.com>
Reported-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 7dc603c9028ea5d4354e0e317e8481df99b06d7e)
Change-Id: Ibc59eb52310a62709d49a744bd5a24e8b97c4ae8
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 include/linux/sched.h |  5 +++--
 kernel/sched/core.c   | 26 +++++++++++++++++++++++---
 kernel/sched/fair.c   | 15 ++++++++++-----
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0d0b1b45418..1872d19d1702 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -222,9 +222,10 @@ extern void proc_sched_set_task(struct task_struct *p);
 #define TASK_WAKING		256
 #define TASK_PARKED		512
 #define TASK_NOLOAD		1024
-#define TASK_STATE_MAX		2048
+#define TASK_NEW		2048
+#define TASK_STATE_MAX		4096
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0b5c588929e9..31cb76a915a8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2259,11 +2259,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	__sched_fork(clone_flags, p);
 	/*
-	 * We mark the process as running here. This guarantees that
+	 * We mark the process as NEW here. This guarantees that
 	 * nobody will actually run it, and a signal or other external
 	 * event cannot wake it up and insert it on the runqueue either.
 	 */
-	p->state = TASK_RUNNING;
+	p->state = TASK_NEW;
 
 	/*
 	 * Make sure we do not leak PI boosting priority to the child.
@@ -2300,6 +2300,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 		p->sched_class = &fair_sched_class;
 	}
 
+	init_entity_runnable_average(&p->se);
+
 	/*
 	 * The child is not yet in the pid-hash so no cgroup attach races,
 	 * and the cgroup is pinned to this child due to cgroup_fork()
@@ -2446,6 +2448,7 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq *rq;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	p->state = TASK_RUNNING;
 
 	walt_init_new_task_load(p);
 
@@ -8690,6 +8693,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
 	struct cgroup_subsys_state *css;
+	int ret = 0;
 
 	cgroup_taskset_for_each(task, css, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -8700,8 +8704,24 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 		if (task->sched_class != &fair_sched_class)
 			return -EINVAL;
 #endif
+		/*
+		 * Serialize against wake_up_new_task() such that if its
+		 * running, we're sure to observe its full state.
+		 */
+		raw_spin_lock_irq(&task->pi_lock);
+		/*
+		 * Avoid calling sched_move_task() before wake_up_new_task()
+		 * has happened. This would lead to problems with PELT, due to
+		 * move wanting to detach+attach while we're not attached yet.
+		 */
+		if (task->state == TASK_NEW)
+			ret = -EINVAL;
+		raw_spin_unlock_irq(&task->pi_lock);
+
+		if (ret)
+			break;
 	}
-	return 0;
+	return ret;
 }
 
 static void cpu_cgroup_attach(struct cgroup_taskset *tset)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2afd81bfda99..76af6e25e82e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -766,7 +766,9 @@ void init_entity_runnable_average(struct sched_entity *se)
 }
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
+static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
 static void attach_entity_cfs_rq(struct sched_entity *se);
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 /*
  * With new tasks being created, their initial util_avgs are extrapolated
@@ -837,7 +839,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
 	attach_entity_cfs_rq(se);
 }
 
-#else
+#else /* !CONFIG_SMP */
 void init_entity_runnable_average(struct sched_entity *se)
 {
 }
@@ -3312,11 +3314,14 @@ void remove_entity_load_avg(struct sched_entity *se)
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 	/*
-	 * Newly created task or never used group entity should not be removed
-	 * from its (source) cfs_rq
+	 * tasks cannot exit without having gone through wake_up_new_task() ->
+	 * post_init_entity_util_avg() which will have added things to the
+	 * cfs_rq, so we can remove unconditionally.
+	 *
+	 * Similarly for groups, they will have passed through
+	 * post_init_entity_util_avg() before unregister_sched_fair_group()
+	 * calls this.
 	 */
-	if (se->avg.last_update_time == 0)
-		return;
 
 	sync_entity_load_avg(se);
 	atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
-- 
cgit v1.2.3


From c14c9b6e3e489062efe08c364196e76e705b8ea3 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 8 Dec 2016 17:56:53 +0100
Subject: UPSTREAM: sched/core: Fix find_idlest_group() for fork

During fork, the utilization of a task is init once the rq has been
selected because the current utilization level of the rq is used to
set the utilization of the fork task. As the task's utilization is
still 0 at this step of the fork sequence, it doesn't make sense to
look for some spare capacity that can fit the task's utilization.
Furthermore, I can see perf regressions for the test:

   hackbench -P -g 1

because the least loaded policy is always bypassed and tasks are not
spread during fork.

With this patch and the fix below, we are back to same performances as
for v4.8. The fix below is only a temporary one used for the test
until a smarter solution is found because we can't simply remove the
test which is useful for others benchmarks

| @@ -5708,13 +5708,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
|	avg_cost = this_sd->avg_scan_cost;
|
| -	/*
| -	 * Due to large variance we need a large fuzz factor; hackbench in
| -	 * particularly is sensitive here.
| -	 */
| -	if ((avg_idle / 512) < avg_cost)
| -		return -1;
| -
|	time = local_clock();
|
|	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {

Tested-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: kernellwp@gmail.com
Cc: umgwanakikbuti@gmail.com
Cc: yuyang.du@intel.comc
Link: http://lkml.kernel.org/r/1481216215-24651-2-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit f519a3f1c6b7a990e5aed37a8f853c6ecfdee945)
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Change-Id: I86cc2ad81af3467c0b2f82b995111f428248baa4
---
 kernel/sched/fair.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 76af6e25e82e..4a20c392c0eb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6051,13 +6051,21 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 	 * utilized systems if we require spare_capacity > task_util(p),
 	 * so we allow for some task stuffing by using
 	 * spare_capacity > task_util(p)/2.
+	 *
+	 * Spare capacity can't be used for fork because the utilization has
+	 * not been set yet, we must first select a rq to compute the initial
+	 * utilization.
 	 */
+	if (sd_flag & SD_BALANCE_FORK)
+		goto skip_spare;
+
 	if (this_spare > task_util(p) / 2 &&
 	    imbalance*this_spare > 100*most_spare)
 		return NULL;
 	else if (most_spare > task_util(p) / 2)
 		return most_spare_sg;
 
+skip_spare:
 	if (!idlest || 100*this_load < imbalance*min_load)
 		return NULL;
 	return idlest;
-- 
cgit v1.2.3


From 4863faf5e4df76322999ad06502cbe27d0ec86dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 3 Oct 2016 16:20:59 +0200
Subject: UPSTREAM: sched/core: Add missing update_rq_clock() in
 post_init_entity_util_avg()

Address this rq-clock update bug:

  WARNING: CPU: 0 PID: 0 at ../kernel/sched/sched.h:797 post_init_entity_util_avg()
  rq->clock_update_flags < RQCF_ACT_SKIP

  Call Trace:
    __warn()
    post_init_entity_util_avg()
    wake_up_new_task()
    _do_fork()
    kernel_thread()
    rest_init()
    start_kernel()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 4126bad6717336abe5d666440ae15555563ca53f)
Change-Id: Ibe9a73386896377f96483d195e433259218755a5
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 31cb76a915a8..7da9ce69e707 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2466,6 +2466,7 @@ void wake_up_new_task(struct task_struct *p)
 	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
 	rq = __task_rq_lock(p);
+	update_rq_clock(rq);
 	post_init_entity_util_avg(&p->se);
 
 	walt_mark_task_starting(p);
-- 
cgit v1.2.3


From bea1b621d952079c4cebc1178ea580c290d0446e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 3 Oct 2016 16:28:37 +0200
Subject: UPSTREAM: sched/core: Add missing update_rq_clock() in
 detach_task_cfs_rq()

Instead of adding the update_rq_clock() all the way at the bottom of
the callstack, add one at the top, this to aid later effort to
minimize update_rq_lock() calls.

  WARNING: CPU: 0 PID: 1 at ../kernel/sched/sched.h:797 detach_task_cfs_rq()
  rq->clock_update_flags < RQCF_ACT_SKIP

  Call Trace:
    dump_stack()
    __warn()
    warn_slowpath_fmt()
    detach_task_cfs_rq()
    switched_from_fair()
    __sched_setscheduler()
    _sched_setscheduler()
    sched_set_stop_task()
    cpu_stop_create()
    __smpboot_create_thread.part.2()
    smpboot_register_percpu_thread_cpumask()
    cpu_stop_init()
    do_one_initcall()
    ? print_cpu_info()
    kernel_init_freeable()
    ? rest_init()
    kernel_init()
    ret_from_fork()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 80f5c1b84baa8180c3c27b7e227429712cd967b6)
Change-Id: Ibffde077d18eabec4c2984158bd9d6d73bd0fb96
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7da9ce69e707..4097f9fa541b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3578,6 +3578,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	BUG_ON(prio > MAX_PRIO);
 
 	rq = __task_rq_lock(p);
+	update_rq_clock(rq);
 
 	/*
 	 * Idle task boosting is a nono in general. There is one
@@ -4095,6 +4096,7 @@ recheck:
 	 * runqueue lock must be held.
 	 */
 	rq = task_rq_lock(p, &flags);
+	update_rq_clock(rq);
 
 	/*
 	 * Changing the policy of the stop threads its a very bad idea
@@ -8685,6 +8687,7 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
 
 	rq = task_rq_lock(task, &flags);
 
+	update_rq_clock(rq);
 	sched_change_group(task, TASK_SET_GROUP);
 
 	task_rq_unlock(rq, task, &flags);
-- 
cgit v1.2.3


From bab39eb879251debe02e573f5453bd93ad5350bd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 3 Oct 2016 16:35:32 +0200
Subject: UPSTREAM: sched/core: Add missing update_rq_clock() call for
 task_hot()

Add the update_rq_clock() call at the top of the callstack instead of
at the bottom where we find it missing, this to aid later effort to
minimize the number of update_rq_lock() calls.

  WARNING: CPU: 30 PID: 194 at ../kernel/sched/sched.h:797 assert_clock_updated()
  rq->clock_update_flags < RQCF_ACT_SKIP

  Call Trace:
    dump_stack()
    __warn()
    warn_slowpath_fmt()
    assert_clock_updated.isra.63.part.64()
    can_migrate_task()
    load_balance()
    pick_next_task_fair()
    __schedule()
    schedule()
    worker_thread()
    kthread()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 3bed5e2166a5e433bf62162f3cd3c5174d335934)
Change-Id: Ief5070dcce486535334dcb739ee16b989ea9df42
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4a20c392c0eb..f6e730bcde27 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8998,6 +8998,7 @@ redo:
 
 more_balance:
 		raw_spin_lock_irqsave(&busiest->lock, flags);
+		update_rq_clock(busiest);
 
 		/*
 		 * cur_ld_moved - load moved in current iteration
@@ -9395,6 +9396,7 @@ static int active_load_balance_cpu_stop(void *data)
 		};
 
 		schedstat_inc(sd, alb_count);
+		update_rq_clock(busiest_rq);
 
 		p = detach_one_task(&env);
 		if (p) {
-- 
cgit v1.2.3


From fd4a95dab858fa1350cc94ce4b435b7295db3ed3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 3 Oct 2016 16:44:25 +0200
Subject: UPSTREAM: sched/core: Add missing update_rq_clock() call in
 set_user_nice()

Address this rq-clock update bug:

  WARNING: CPU: 30 PID: 195 at ../kernel/sched/sched.h:797 set_next_entity()
  rq->clock_update_flags < RQCF_ACT_SKIP

  Call Trace:
    dump_stack()
    __warn()
    warn_slowpath_fmt()
    set_next_entity()
    ? _raw_spin_lock()
    set_curr_task_fair()
    set_user_nice.part.85()
    set_user_nice()
    create_worker()
    worker_thread()
    kthread()
    ret_from_fork()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 2fb8d36787affe26f3536c3d8ec094995a48037d)
Change-Id: I53ba056e72820c7fadb3f022e4ee3b821c0de17d
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4097f9fa541b..ca9d72fa8e66 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3670,6 +3670,8 @@ void set_user_nice(struct task_struct *p, long nice)
 	 * the task might be in the middle of scheduling on another CPU.
 	 */
 	rq = task_rq_lock(p, &flags);
+	update_rq_clock(rq);
+
 	/*
 	 * The RT priorities are set via sched_setscheduler(), but we still
 	 * allow the 'normal' nice value to be set - but as expected
-- 
cgit v1.2.3


From 795a6867cfe1ea0bbe967d2b037ae856484556a4 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Mon, 7 Aug 2017 17:39:00 +0100
Subject: UPSTREAM: sched/fair: Force balancing on nohz balance if local group
 has capacity

The "goto force_balance" here is intended to mitigate the fact that
avg_load calculations can result in bad placement decisions when
priority is asymmetrical.

The original commit that adds it:

  fab476228ba3 ("sched: Force balancing on newidle balance if local group has capacity")

explains:

    Under certain situations, such as a niced down task (i.e. nice =
    -15) in the presence of nr_cpus NICE0 tasks, the niced task lands
    on a sched group and kicks away other tasks because of its large
    weight. This leads to sub-optimal utilization of the
    machine. Even though the sched group has capacity, it does not
    pull tasks because sds.this_load >> sds.max_load, and f_b_g()
    returns NULL.

A similar but inverted issue also affects ARM big.LITTLE (asymmetrical CPU
capacity) systems - consider 8 always-running, same-priority tasks on a
system with 4 "big" and 4 "little" CPUs. Suppose that 5 of them end up on
the "big" CPUs (which will be represented by one sched_group in the DIE
sched_domain) and 3 on the "little" (the other sched_group in DIE), leaving
one CPU unused. Because the "big" group has a higher group_capacity its
avg_load may not present an imbalance that would cause migrating a
task to the idle "little".

The force_balance case here solves the problem but currently only for
CPU_NEWLY_IDLE balances, which in theory might never happen on the
unused CPU. Including CPU_IDLE in the force_balance case means
there's an upper bound on the time before we can attempt to solve the
underutilization: after DIE's sd->balance_interval has passed the
next nohz balance kick will help us out.

Change-Id: I807ba5cba0ef1b8bbec02cbcd4755fd32af10135
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170807163900.25180-1-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from: commit 583ffd99d765 tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f6e730bcde27..ae94c1124655 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8707,8 +8707,11 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (busiest->group_type == group_imbalanced)
 		goto force_balance;
 
-	/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-	if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) &&
+	/*
+	 * When dst_cpu is idle, prevent SMP nice and/or asymmetric group
+	 * capacities from resulting in underutilization due to avg_load.
+	 */
+	if (env->idle != CPU_NOT_IDLE && group_has_capacity(env, local) &&
 	    busiest->group_no_capacity)
 		goto force_balance;
 
-- 
cgit v1.2.3


From 0f743ce7458c3c7d4be2367c8ed538069f1472b6 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 31 Aug 2017 12:57:58 +0100
Subject: BACKPORT: sched/fair: Move select_task_rq_fair slow-path into its own
 function

In preparation for changes that would otherwise require adding a new
level of indentation to the while(sd) loop, create a new function
find_idlest_cpu() which contains this loop, and rename the existing
find_idlest_cpu() to find_idlest_group_cpu().

Code inside the while(sd) loop is unchanged. @new_cpu is added as a
variable in the new function, with the same initial value as the
@new_cpu in select_task_rq_fair().

Change-Id: I9842308cab00dc9cd6c513fc38c609089a1aaaaf
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171005114516.18617-2-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(reworked for eas/cas schedstats added in Android)
(cherry-picked commit 18bd1b4bd53a from tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 114 ++++++++++++++++++++++++++++------------------------
 1 file changed, 62 insertions(+), 52 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ae94c1124655..561d1c505512 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6072,10 +6072,10 @@ skip_spare:
 }
 
 /*
- * find_idlest_cpu - find the idlest cpu among the cpus in group.
+ * find_idlest_group_cpu - find the idlest cpu among the cpus in group.
  */
 static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 {
 	unsigned long load, min_load = ULONG_MAX;
 	unsigned int min_exit_latency = UINT_MAX;
@@ -6122,6 +6122,65 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 	}
 
 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
+ }
+
+static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
+				  int cpu, int prev_cpu, int sd_flag)
+{
+	int new_cpu = prev_cpu;
+	int wu = sd_flag & SD_BALANCE_WAKE;
+	int cas_cpu = -1;
+
+	if (wu) {
+		schedstat_inc(p, se.statistics.nr_wakeups_cas_attempts);
+		schedstat_inc(this_rq(), eas_stats.cas_attempts);
+	}
+
+	while (sd) {
+		struct sched_group *group;
+		struct sched_domain *tmp;
+		int weight;
+
+		if (wu)
+			schedstat_inc(sd, eas_stats.cas_attempts);
+
+		if (!(sd->flags & sd_flag)) {
+			sd = sd->child;
+			continue;
+		}
+
+		group = find_idlest_group(sd, p, cpu, sd_flag);
+		if (!group) {
+			sd = sd->child;
+			continue;
+		}
+
+		new_cpu = find_idlest_group_cpu(group, p, cpu);
+		if (new_cpu == -1 || new_cpu == cpu) {
+			/* Now try balancing at a lower domain level of cpu */
+			sd = sd->child;
+			continue;
+		}
+
+		/* Now try balancing at a lower domain level of new_cpu */
+		cpu = cas_cpu = new_cpu;
+		weight = sd->span_weight;
+		sd = NULL;
+		for_each_domain(cpu, tmp) {
+			if (weight <= tmp->span_weight)
+				break;
+			if (tmp->flags & sd_flag)
+				sd = tmp;
+		}
+		/* while loop will break here if sd == NULL */
+	}
+
+	if (wu && (cas_cpu >= 0)) {
+		schedstat_inc(p, se.statistics.nr_wakeups_cas_count);
+		schedstat_inc(this_rq(), eas_stats.cas_count);
+	}
+
+	return new_cpu;
 }
 
 /*
@@ -6698,56 +6757,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
 	} else {
-		int wu = sd_flag & SD_BALANCE_WAKE;
-		int cas_cpu = -1;
-
-		if (wu) {
-			schedstat_inc(p, se.statistics.nr_wakeups_cas_attempts);
-			schedstat_inc(this_rq(), eas_stats.cas_attempts);
-		}
-
-		while (sd) {
-			struct sched_group *group;
-			int weight;
-
-			if (wu)
-				schedstat_inc(sd, eas_stats.cas_attempts);
-
-			if (!(sd->flags & sd_flag)) {
-				sd = sd->child;
-				continue;
-			}
-
-			group = find_idlest_group(sd, p, cpu, sd_flag);
-			if (!group) {
-				sd = sd->child;
-				continue;
-			}
-
-			new_cpu = find_idlest_cpu(group, p, cpu);
-			if (new_cpu == -1 || new_cpu == cpu) {
-				/* Now try balancing at a lower domain level of cpu */
-				sd = sd->child;
-				continue;
-			}
-
-			/* Now try balancing at a lower domain level of new_cpu */
-			cpu = cas_cpu = new_cpu;
-			weight = sd->span_weight;
-			sd = NULL;
-			for_each_domain(cpu, tmp) {
-				if (weight <= tmp->span_weight)
-					break;
-				if (tmp->flags & sd_flag)
-					sd = tmp;
-			}
-			/* while loop will break here if sd == NULL */
-		}
-
-		if (wu && (cas_cpu >= 0)) {
-			schedstat_inc(p, se.statistics.nr_wakeups_cas_count);
-			schedstat_inc(this_rq(), eas_stats.cas_count);
-		}
+		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 529def2ffe532855f570a3a00e9f78ce59c8d84b Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 31 Aug 2017 12:57:59 +0100
Subject: UPSTREAM: sched/fair: Remove unnecessary comparison with -1

Since commit:

  83a0a96a5f26 ("sched/fair: Leverage the idle state info when choosing the "idlest" cpu")

find_idlest_group_cpu() (formerly find_idlest_cpu) no longer returns -1,
so we can simplify the checking of the return value in find_idlest_cpu().

Change-Id: I98f4b9f178cd93a30408e024e608d36771764c7b
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171005114516.18617-3-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from commit e90381eaecf6 in tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 561d1c505512..cf28d82fad41 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6156,7 +6156,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 		}
 
 		new_cpu = find_idlest_group_cpu(group, p, cpu);
-		if (new_cpu == -1 || new_cpu == cpu) {
+		if (new_cpu == cpu) {
 			/* Now try balancing at a lower domain level of cpu */
 			sd = sd->child;
 			continue;
-- 
cgit v1.2.3


From 9c825cf6165c1662ce588a1fe11a912eaeaec928 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 31 Aug 2017 12:58:00 +0100
Subject: BACKPORT: sched/fair: Fix find_idlest_group when local group is not
 allowed

When the local group is not allowed we do not modify this_*_load from
their initial value of 0. That means that the load checks at the end
of find_idlest_group cause us to incorrectly return NULL. Fixing the
initial values to ULONG_MAX means we will instead return the idlest
remote group in that case.

BACKPORT: Note 4.4 is missing commit 6b94780e45c1 "sched/core: Use
load_avg for selecting idlest group", so we only have to fix
this_load instead of this_runnable_load and this_avg_load.

Change-Id: I41f775b0e7c8f5e675c2780f955bb130a563cba7
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171005114516.18617-4-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from: commit 0d10ab952e99 tip:sched/core)
(backport changes described above)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cf28d82fad41..dc685b67d08b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5983,7 +5983,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 {
 	struct sched_group *idlest = NULL, *group = sd->groups;
 	struct sched_group *most_spare_sg = NULL;
-	unsigned long min_load = ULONG_MAX, this_load = 0;
+	unsigned long min_load = ULONG_MAX, this_load = ULONG_MAX;
 	unsigned long most_spare = 0, this_spare = 0;
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
-- 
cgit v1.2.3


From 411654764590c071ef55242ffd50c4e74930353b Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 31 Aug 2017 12:58:01 +0100
Subject: UPSTREAM: sched/fair: Fix usage of find_idlest_group() when no groups
 are allowed

When 'p' is not allowed on any of the CPUs in the sched_domain, we
currently return NULL from find_idlest_group(), and pointlessly
continue the search on lower sched_domain levels (where 'p' is also not
allowed) before returning prev_cpu regardless (as we have not updated
new_cpu).

Add an explicit check for this case, and add a comment to
find_idlest_group(). Now when find_idlest_group() returns NULL, it always
means that the local group is allowed and idlest.

Change-Id: I5f2648d2f7fb0465677961ecb7473df3d06f0057
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171005114516.18617-5-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from: commit 6fee85ccbc76 tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dc685b67d08b..0e3a9935515c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5976,6 +5976,8 @@ static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
+ *
+ * Assumes p is allowed on at least one CPU in sd.
  */
 static struct sched_group *
 find_idlest_group(struct sched_domain *sd, struct task_struct *p,
@@ -6136,6 +6138,9 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 		schedstat_inc(this_rq(), eas_stats.cas_attempts);
 	}
 
+	if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
+		return prev_cpu;
+
 	while (sd) {
 		struct sched_group *group;
 		struct sched_domain *tmp;
-- 
cgit v1.2.3


From 5a8663664915417aec806da2b0bc534d675f413d Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 31 Aug 2017 12:58:02 +0100
Subject: UPSTREAM: sched/fair: Fix usage of find_idlest_group() when the local
 group is idlest

find_idlest_group() returns NULL when the local group is idlest. The
caller then continues the find_idlest_group() search at a lower level
of the current CPU's sched_domain hierarchy. find_idlest_group_cpu() is
not consulted and, crucially, @new_cpu is not updated. This means the
search is pointless and we return @prev_cpu from select_task_rq_fair().

This is fixed by initialising @new_cpu to @cpu instead of @prev_cpu.

Change-Id: Ie531f5bb29775952bdc4c148b6e974b2f5f32b7a
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20171005114516.18617-6-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from: commit 93f50f90247e tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0e3a9935515c..bf7b7a7e778b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6129,7 +6129,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
 				  int cpu, int prev_cpu, int sd_flag)
 {
-	int new_cpu = prev_cpu;
+	int new_cpu = cpu;
 	int wu = sd_flag & SD_BALANCE_WAKE;
 	int cas_cpu = -1;
 
-- 
cgit v1.2.3


From 2f30db8df4076c54f92ac2ebe26734e27c164fd3 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Tue, 1 Aug 2017 15:48:37 +0100
Subject: UPSTREAM: sched/fair: Sync task util before slow-path wakeup

We use task_util() in find_idlest_group() via capacity_spare_wake().
This task_util() updated in wake_cap(). However wake_cap() is not the
only reason for ending up in find_idlest_group() - we could have been sent
there by wake_wide(). So explicitly sync the task util with prev_cpu
when we are about to head to find_idlest_group().

We could simply do this at the beginning of
select_task_rq_fair() (i.e. irrespective of whether we're heading to
select_idle_sibling() or find_idlest_group() & co), but I didn't want to
slow down the select_idle_sibling() path more than necessary.

Don't do this during fork balancing, we won't need the task_util and
we'd just clobber the last_update_time, which is supposed to be 0.

Change-Id: I935f4bfdfec3e8b914457aac3387ce264d5fd484
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andres Oportus <andresoportus@google.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: http://lkml.kernel.org/r/20170808095519.10077-1-brendan.jackman@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry-picked-from: commit ea16f0ea6c3d tip:sched/core)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bf7b7a7e778b..5d8081b77f8a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6757,6 +6757,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			new_cpu = cpu;
 	}
 
+	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
+		/*
+		 * We're going to need the task's util for capacity_spare_wake
+		 * in find_idlest_group. Sync it up to prev_cpu's
+		 * last_update_time.
+		 */
+		sync_entity_load_avg(&p->se);
+	}
+
 	if (!sd) {
 		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-- 
cgit v1.2.3


From 2aada289d7be37116f21d70177cb06a929b5d961 Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Thu, 7 Sep 2017 12:24:45 +0100
Subject: sched/fair: trace energy_diff for non boosted tasks

In systems where SchedTune is enabled, we do not report energy diff for non
boosted tasks. Let's fix this by always genereting an energy_diff event where
however:
  nrg.delta = 0, since we skip energy normalization
  payoff = nrg.diff, since the payoff is defined just by the energy difference

Change-Id: I9a11ec19b6f56da04147f5ae5b47daf1dd180445
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5d8081b77f8a..a17820385ee2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5702,8 +5702,14 @@ energy_diff(struct energy_env *eenv)
 	__energy_diff(eenv);
 
 	/* Return energy diff when boost margin is 0 */
-	if (boost == 0)
+	if (boost == 0) {
+		trace_sched_energy_diff(eenv->task,
+				eenv->src_cpu, eenv->dst_cpu, eenv->util_delta,
+				eenv->nrg.before, eenv->nrg.after, eenv->nrg.diff,
+				eenv->cap.before, eenv->cap.after, eenv->cap.delta,
+				0, -eenv->nrg.diff);
 		return eenv->nrg.diff;
+	}
 
 	/* Compute normalized energy diff */
 	nrg_delta = normalize_energy(eenv->nrg.diff);
-- 
cgit v1.2.3


From 4edc5b0e387a906a00cb45af2abf76aa3a930438 Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Thu, 7 Sep 2017 12:27:56 +0100
Subject: sched/fair: ignore backup CPU when not valid

The find_best_target can sometimes not return a valid backup CPU, either
because it cannot find one or just becasue it returns prev_cpu as a backup.
In these cases we should skip the energy_diff evaluation for the backup CPU.

Change-Id: I3787dbdfe74122348dd7a7485b88c4679051bd32
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a17820385ee2..417b373e4074 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6686,7 +6686,9 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
 			/* No energy saving for target_cpu, try backup */
 			target_cpu = tmp_backup;
 			eenv.dst_cpu = target_cpu;
-			if (tmp_backup < 0 || energy_diff(&eenv) >= 0) {
+			if (tmp_backup < 0 ||
+			    tmp_backup == prev_cpu ||
+			    energy_diff(&eenv) >= 0) {
 				schedstat_inc(p, se.statistics.nr_wakeups_secb_no_nrg_sav);
 				schedstat_inc(this_rq(), eas_stats.secb_no_nrg_sav);
 				target_cpu = prev_cpu;
-- 
cgit v1.2.3


From ca42e804464bc9cb81e2ad07fe689c9f8514d5fe Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Mon, 17 Jul 2017 15:54:39 +0100
Subject: sched/fair: enforce EAS mode

For non latency sensitive tasks the goal is to optimize for energy efficiency.
Thus, we should try our best to avoid moving a task on a CPU which is then
going to be marked as overutilized.

Let's use the capacity_margin metric to verify if a candidate target CPU
should be considered without risking to bail out of EAS mode.

Change-Id: Ib3697106f4073aedf4a6c6ce42bd5d000fa8c007
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 417b373e4074..0784864eeb30 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6469,6 +6469,19 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
 				continue;
 			}
 
+			/*
+			 * Enforce EAS mode
+			 *
+			 * For non latency sensitive tasks, skip CPUs that
+			 * will be overutilized by moving the task there.
+			 *
+			 * The goal here is to remain in EAS mode as long as
+			 * possible at least for !prefer_idle tasks.
+			 */
+			if ((new_util * capacity_margin) >
+			    (capacity_orig * SCHED_CAPACITY_SCALE))
+				continue;
+
 			/*
 			 * Case B) Non latency sensitive tasks on IDLE CPUs.
 			 *
-- 
cgit v1.2.3


From 5f8b3a757d6561e5668cb09c75b856347263718b Mon Sep 17 00:00:00 2001
From: Chris Redpath <chris.redpath@arm.com>
Date: Tue, 12 Sep 2017 14:44:24 +0100
Subject: sched/fair: consider task utilization in group_norm_util()

The group_norm_util() function is used to compute the normalized
utilization of a SG given a certain energy_env configuration.
The main client of this function is the energy_diff function when it
comes to compute the SG energy for one of the before/after scheduling
candidates.

Currently, the energy_diff function sets util_delta = 0 when it wants to
compute the energy corresponding to the scheduling candidate where the
task runs in the previous CPU. This implies that, for the task waking up
in the previous CPU we consider only its blocked load tracked by the CPU
RQ. However, in case of a medium-big task which is waking up on a long
time idle CPU, this blocked load can be already completely decayed.

More in general, the current approach is biased towards under-estimating
the energy consumption for the "before" scheduling candidate.

This patch fixes this by:
- always use the cpu_util_wake() to properly get the utilization of a CPU
  without any (partially decayed) contribution of the waking up task
- adding the task utilization to the cpu_util_wake just for the
  target cpu

The "target CPU" is defined by the energy_env to be either the src_cpu
or the dst_cpu, depending on which scheduling candidate we are
considering.

This patch update also the definition of __cpu_norm_util(), which is
currently called just by the group_norm_util() function. This allows to
simplify the code by using this function just to normalize a specified
utilization with respect to a given capacity.

This update allows to completely remove any dependency of
group_norm_util() from calc_util_delta().

Change-Id: I3b6ec50ce8decb1521faae660e326ab3319d3c82
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 58 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0784864eeb30..9575473f0cf6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5297,6 +5297,7 @@ struct energy_env {
 	int			util_delta;
 	int			src_cpu;
 	int			dst_cpu;
+	int			trg_cpu;
 	int			energy;
 	int			payoff;
 	struct task_struct	*task;
@@ -5313,11 +5314,14 @@ struct energy_env {
 	} cap;
 };
 
+static int cpu_util_wake(int cpu, struct task_struct *p);
+
 /*
  * __cpu_norm_util() returns the cpu util relative to a specific capacity,
- * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE] which is useful for
- * energy calculations. Using the scale-invariant util returned by
- * cpu_util() and approximating scale-invariant util by:
+ * i.e. it's busy ratio, in the range [0..SCHED_LOAD_SCALE], which is useful for
+ * energy calculations.
+ *
+ * Since util is a scale-invariant utilization defined as:
  *
  *   util ~ (curr_freq/max_freq)*1024 * capacity_orig/1024 * running_time/time
  *
@@ -5327,10 +5331,8 @@ struct energy_env {
  *
  *   norm_util = running_time/time ~ util/capacity
  */
-static unsigned long __cpu_norm_util(int cpu, unsigned long capacity, int delta)
+static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
 {
-	int util = __cpu_util(cpu, delta);
-
 	if (util >= capacity)
 		return SCHED_CAPACITY_SCALE;
 
@@ -5362,28 +5364,37 @@ unsigned long group_max_util(struct energy_env *eenv)
 
 /*
  * group_norm_util() returns the approximated group util relative to it's
- * current capacity (busy ratio) in the range [0..SCHED_LOAD_SCALE] for use in
- * energy calculations. Since task executions may or may not overlap in time in
- * the group the true normalized util is between max(cpu_norm_util(i)) and
- * sum(cpu_norm_util(i)) when iterating over all cpus in the group, i. The
- * latter is used as the estimate as it leads to a more pessimistic energy
+ * current capacity (busy ratio), in the range [0..SCHED_LOAD_SCALE], for use
+ * in energy calculations.
+ *
+ * Since task executions may or may not overlap in time in the group the true
+ * normalized util is between MAX(cpu_norm_util(i)) and SUM(cpu_norm_util(i))
+ * when iterating over all CPUs in the group.
+ * The latter estimate is used as it leads to a more pessimistic energy
  * estimate (more busy).
  */
 static unsigned
 long group_norm_util(struct energy_env *eenv, struct sched_group *sg)
 {
-	int i, delta;
-	unsigned long util_sum = 0;
 	unsigned long capacity = sg->sge->cap_states[eenv->cap_idx].cap;
+	unsigned long util, util_sum = 0;
+	int cpu;
 
-	for_each_cpu(i, sched_group_cpus(sg)) {
-		delta = calc_util_delta(eenv, i);
-		util_sum += __cpu_norm_util(i, capacity, delta);
+	for_each_cpu(cpu, sched_group_cpus(sg)) {
+		util = cpu_util_wake(cpu, eenv->task);
+
+		/*
+		 * If we are looking at the target CPU specified by the eenv,
+		 * then we should add the (estimated) utilization of the task
+		 * assuming we will wake it up on that CPU.
+		 */
+		if (unlikely(cpu == eenv->trg_cpu))
+			util += eenv->util_delta;
+
+		util_sum += __cpu_norm_util(util, capacity);
 	}
 
-	if (util_sum > SCHED_CAPACITY_SCALE)
-		return SCHED_CAPACITY_SCALE;
-	return util_sum;
+	return min_t(unsigned long, util_sum, SCHED_CAPACITY_SCALE);
 }
 
 static int find_new_capacity(struct energy_env *eenv,
@@ -5575,6 +5586,8 @@ static inline bool cpu_in_sg(struct sched_group *sg, int cpu)
 	return cpu != -1 && cpumask_test_cpu(cpu, sched_group_cpus(sg));
 }
 
+static inline unsigned long task_util(struct task_struct *p);
+
 /*
  * energy_diff(): Estimate the energy impact of changing the utilization
  * distribution. eenv specifies the change: utilisation amount, source, and
@@ -5590,11 +5603,13 @@ static inline int __energy_diff(struct energy_env *eenv)
 	int diff, margin;
 
 	struct energy_env eenv_before = {
-		.util_delta	= 0,
+		.util_delta	= task_util(eenv->task),
 		.src_cpu	= eenv->src_cpu,
 		.dst_cpu	= eenv->dst_cpu,
+		.trg_cpu	= eenv->src_cpu,
 		.nrg		= { 0, 0, 0, 0},
 		.cap		= { 0, 0, 0 },
+		.task		= eenv->task,
 	};
 
 	if (eenv->src_cpu == eenv->dst_cpu)
@@ -5972,8 +5987,6 @@ boosted_task_util(struct task_struct *task)
 	return util + margin;
 }
 
-static int cpu_util_wake(int cpu, struct task_struct *p);
-
 static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
 {
 	return capacity_orig_of(cpu) - cpu_util_wake(cpu, p);
@@ -6681,6 +6694,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
 			.src_cpu        = prev_cpu,
 			.dst_cpu        = target_cpu,
 			.task           = p,
+			.trg_cpu	= target_cpu,
 		};
 
 
-- 
cgit v1.2.3


From 3c71cbb896fe15de4f365223af45096c4098c309 Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Thu, 1 Jun 2017 16:40:22 +0100
Subject: sched/fair: consider task utilization in group_max_util()

The group_max_util() function is used to compute the maximum utilization
across the CPUs of a certain energy_env configuration.
Its main client is the energy_diff function when it needs to compute the
SG capacity for one of the before/after scheduling candidates.

Currently, the energy_diff function sets util_delta = 0 when it wants to
compute the energy corresponding to the scheduling candidate where the
task runs in the previous CPU. This implies that, for the task waking up
in the previous CPU we consider only its blocked load tracked by the CPU
RQ. However, in case of a medium-big task which is waking up on a long
time idle CPU, this blocked load can be already completely decayed.

More in general, the current approach is biased towards under-estimating
the capacity requirements for the "before" scheduling candidate.

This patch fixes this by:
- always use the cpu_util_wake() to properly get the utilization of a CPU
  without any (partially decayed) contribution of the waking up task
- adding the task utilization to the cpu_util_wake just for the target
  cpu

The "target CPU" is defined by the energy_env to be either the src_cpu or
the dst_cpu, depending on which scheduling candidate we are considering.

Finally, since this update removes the last usage of calc_util_delta()
this function is now safely removed.

Change-Id: I20ee1bcf40cee6bf6e265fb2d32ef79061ad6ced
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9575473f0cf6..e9919aeacdd5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5339,24 +5339,24 @@ static unsigned long __cpu_norm_util(unsigned long util, unsigned long capacity)
 	return (util << SCHED_CAPACITY_SHIFT)/capacity;
 }
 
-static int calc_util_delta(struct energy_env *eenv, int cpu)
+static unsigned long group_max_util(struct energy_env *eenv)
 {
-	if (cpu == eenv->src_cpu)
-		return -eenv->util_delta;
-	if (cpu == eenv->dst_cpu)
-		return eenv->util_delta;
-	return 0;
-}
-
-static
-unsigned long group_max_util(struct energy_env *eenv)
-{
-	int i, delta;
 	unsigned long max_util = 0;
+	unsigned long util;
+	int cpu;
+
+	for_each_cpu(cpu, sched_group_cpus(eenv->sg_cap)) {
+		util = cpu_util_wake(cpu, eenv->task);
+
+		/*
+		 * If we are looking at the target CPU specified by the eenv,
+		 * then we should add the (estimated) utilization of the task
+		 * assuming we will wake it up on that CPU.
+		 */
+		if (unlikely(cpu == eenv->trg_cpu))
+			util += eenv->util_delta;
 
-	for_each_cpu(i, sched_group_cpus(eenv->sg_cap)) {
-		delta = calc_util_delta(eenv, i);
-		max_util = max(max_util, __cpu_util(i, delta));
+		max_util = max(max_util, util);
 	}
 
 	return max_util;
-- 
cgit v1.2.3


From e3ba92c160d3fb21af5b28a21c63851fad21b168 Mon Sep 17 00:00:00 2001
From: Russ Weight <russell.h.weight@intel.com>
Date: Thu, 8 Jun 2017 11:38:59 -0700
Subject: sched/tune: access schedtune_initialized under CGROUP_SCHEDTUNE

schedtune_initialized is protected by CONFIG_CGROUP_SCHEDTUNE, but
is being used without CONFIG_CGROUP_SCHEDTUNE being defined. Add
appropriate ifdefs around the usage of schedtune_initialized to
avoid a compilation error when CONFIG_CGROUP_SCHEDTUNE is not
defined.

Change-Id: Iab79bf053d74db3eeb84c09d71d43b4e39746ed2
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
---
 kernel/sched/fair.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e9919aeacdd5..6023d9e3a9f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5668,7 +5668,11 @@ static inline int __energy_diff(struct energy_env *eenv)
 #ifdef CONFIG_SCHED_TUNE
 
 struct target_nrg schedtune_target_nrg;
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
 extern bool schedtune_initialized;
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
+
 /*
  * System energy normalization
  * Returns the normalized value, in the range [0..SCHED_CAPACITY_SCALE],
@@ -5679,9 +5683,11 @@ normalize_energy(int energy_diff)
 {
 	u32 normalized_nrg;
 
+#ifdef CONFIG_CGROUP_SCHEDTUNE
 	/* during early setup, we don't know the extents */
 	if (unlikely(!schedtune_initialized))
 		return energy_diff < 0 ? -1 : 1 ;
+#endif /* CONFIG_CGROUP_SCHEDTUNE */
 
 #ifdef CONFIG_SCHED_DEBUG
 	{
-- 
cgit v1.2.3


From effc721b3c9b3bd258313450cdfd3d0e644f4d85 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 7 Aug 2017 18:14:37 +0800
Subject: sched/fair: remove useless variable in find_best_target

Patch 5680f23f20c7 ("sched/fair: streamline find_best_target
heuristics") has reworked function find_best_target, as result the
variable "target_util" is useless now. So remove it.

Change-Id: I5447062419e5828a49115119984fac6cd37db034
Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 kernel/sched/fair.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6023d9e3a9f5..9bc717ef81f5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6350,7 +6350,6 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
 	unsigned long target_capacity = ULONG_MAX;
 	unsigned long min_wake_util = ULONG_MAX;
 	unsigned long target_max_spare_cap = 0;
-	unsigned long target_util = ULONG_MAX;
 	unsigned long best_active_util = ULONG_MAX;
 	int best_idle_cstate = INT_MAX;
 	struct sched_domain *sd;
@@ -6579,7 +6578,6 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
 
 			target_max_spare_cap = capacity_orig - new_util;
 			target_capacity = capacity_orig;
-			target_util = new_util;
 			target_cpu = i;
 		}
 
-- 
cgit v1.2.3


From 43bd960dfe728284e1059f11d6c686d23887c1c6 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Fri, 3 Feb 2017 11:15:31 -0800
Subject: sched: WALT: account cumulative window demand

Energy cost estimation has been a long lasting challenge for WALT
because WALT guides CPU frequency based on the CPU utilization of
previous window.  Consequently it's not possible to know newly
waking-up task's energy cost until WALT's end of the current window.

The WALT already tracks 'Previous Runnable Sum' (prev_runnable_sum)
and 'Cumulative Runnable Average' (cr_avg).  They are designed for
CPU frequency guidance and task placement but unfortunately both
are not suitable for the energy cost estimation.

It's because using prev_runnable_sum for energy cost calculation would
make us to account CPU and task's energy solely based on activity in the
previous window so for example, any task didn't have an activity in the
previous window will be accounted as a 'zero energy cost' task.
Energy estimation with cr_avg is what energy_diff() relies on at present.
However cr_avg can only represent instantaneous picture of energy cost
thus for example, if a CPU was fully occupied for an entire WALT window
and became idle just before window boundary, and if there is a wake-up,
energy_diff() accounts that CPU is a 'zero energy cost' CPU.

As a result, introduce a new accounting unit 'Cumulative Window Demand'.
The cumulative window demand tracks all the tasks' demands have seen in
current window which is neither instantaneous nor actual execution time.
Because task demand represents estimated scaled execution time when the
task runs a full window, accumulation of all the demands represents
predicted CPU load at the end of window.

Thus we can estimate CPU's frequency at the end of current WALT window
with the cumulative window demand.

The use of prev_runnable_sum for the CPU frequency guidance and cr_avg
for the task placement have not changed and these are going to be used
for both purpose while this patch aims to add an additional statistics.

Change-Id: I9908c77ead9973a26dea2b36c001c2baf944d4f5
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   |  8 ++++++++
 kernel/sched/sched.h  | 12 +++++++++++
 kernel/sched/walt.c   | 56 ++++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1872d19d1702..62e179f84aef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1568,6 +1568,7 @@ struct task_struct {
 	 * of this task
 	 */
 	u32 init_load_pct;
+	u64 last_sleep_ts;
 #endif
 
 #ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca9d72fa8e66..563f316f5330 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2170,6 +2170,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
+#ifdef CONFIG_SCHED_WALT
+	p->last_sleep_ts		= 0;
+#endif
+
 	INIT_LIST_HEAD(&p->se.group_node);
 	walt_init_new_task_load(p);
 
@@ -3379,6 +3383,10 @@ static void __sched notrace __schedule(bool preempt)
 	rq->clock_skip_update = 0;
 
 	if (likely(prev != next)) {
+#ifdef CONFIG_SCHED_WALT
+		if (!prev->on_rq)
+			prev->last_sleep_ts = wallclock;
+#endif
 		rq->nr_switches++;
 		rq->curr = next;
 		++*switch_count;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8d3712107e61..deba080af845 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -690,6 +690,7 @@ struct rq {
 	u64 cur_irqload;
 	u64 avg_irqload;
 	u64 irqload_ts;
+	u64 cum_window_demand;
 #endif /* CONFIG_SCHED_WALT */
 
 
@@ -2101,6 +2102,17 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
+#ifdef CONFIG_SCHED_WALT
+
+static inline bool
+walt_task_in_cum_window_demand(struct rq *rq, struct task_struct *p)
+{
+	return cpu_of(rq) == task_cpu(p) &&
+	       (p->on_rq || p->last_sleep_ts >= rq->window_start);
+}
+
+#endif /* CONFIG_SCHED_WALT */
+
 #ifdef arch_scale_freq_capacity
 #ifndef arch_scale_freq_invariant
 #define arch_scale_freq_invariant()     (true)
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 441cba01bc04..93f61486f989 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -70,11 +70,28 @@ static unsigned int task_load(struct task_struct *p)
 	return p->ravg.demand;
 }
 
+static inline void fixup_cum_window_demand(struct rq *rq, s64 delta)
+{
+	rq->cum_window_demand += delta;
+	if (unlikely((s64)rq->cum_window_demand < 0))
+		rq->cum_window_demand = 0;
+}
+
 void
 walt_inc_cumulative_runnable_avg(struct rq *rq,
 				 struct task_struct *p)
 {
 	rq->cumulative_runnable_avg += p->ravg.demand;
+
+	/*
+	 * Add a task's contribution to the cumulative window demand when
+	 *
+	 * (1) task is enqueued with on_rq = 1 i.e migration,
+	 *     prio/cgroup/class change.
+	 * (2) task is waking for the first time in this window.
+	 */
+	if (p->on_rq || (p->last_sleep_ts < rq->window_start))
+		fixup_cum_window_demand(rq, p->ravg.demand);
 }
 
 void
@@ -83,6 +100,14 @@ walt_dec_cumulative_runnable_avg(struct rq *rq,
 {
 	rq->cumulative_runnable_avg -= p->ravg.demand;
 	BUG_ON((s64)rq->cumulative_runnable_avg < 0);
+
+	/*
+	 * on_rq will be 1 for sleeping tasks. So check if the task
+	 * is migrating or dequeuing in RUNNING state to change the
+	 * prio/cgroup/class.
+	 */
+	if (task_on_rq_migrating(p) || p->state == TASK_RUNNING)
+		fixup_cum_window_demand(rq, -(s64)p->ravg.demand);
 }
 
 static void
@@ -95,6 +120,8 @@ fixup_cumulative_runnable_avg(struct rq *rq,
 	if ((s64)rq->cumulative_runnable_avg < 0)
 		panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
 			task_load_delta, task_load(p));
+
+	fixup_cum_window_demand(rq, task_load_delta);
 }
 
 u64 walt_ktime_clock(void)
@@ -180,6 +207,8 @@ update_window_start(struct rq *rq, u64 wallclock)
 
 	nr_windows = div64_u64(delta, walt_ravg_window);
 	rq->window_start += (u64)nr_windows * (u64)walt_ravg_window;
+
+	rq->cum_window_demand = rq->cumulative_runnable_avg;
 }
 
 /*
@@ -568,10 +597,20 @@ static void update_history(struct rq *rq, struct task_struct *p,
 	 * A throttled deadline sched class task gets dequeued without
 	 * changing p->on_rq. Since the dequeue decrements hmp stats
 	 * avoid decrementing it here again.
+	 *
+	 * When window is rolled over, the cumulative window demand
+	 * is reset to the cumulative runnable average (contribution from
+	 * the tasks on the runqueue). If the current task is dequeued
+	 * already, it's demand is not included in the cumulative runnable
+	 * average. So add the task demand separately to cumulative window
+	 * demand.
 	 */
-	if (task_on_rq_queued(p) && (!task_has_dl_policy(p) ||
-						!p->dl.dl_throttled))
-		fixup_cumulative_runnable_avg(rq, p, demand);
+	if (!task_has_dl_policy(p) || !p->dl.dl_throttled) {
+		if (task_on_rq_queued(p))
+			fixup_cumulative_runnable_avg(rq, p, demand);
+		else if (rq->curr == p)
+			fixup_cum_window_demand(rq, demand);
+	}
 
 	p->ravg.demand = demand;
 
@@ -792,6 +831,17 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu)
 
 	walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0);
 
+	/*
+	 * When a task is migrating during the wakeup, adjust
+	 * the task's contribution towards cumulative window
+	 * demand.
+	 */
+	if (p->state == TASK_WAKING &&
+	    p->last_sleep_ts >= src_rq->window_start) {
+		fixup_cum_window_demand(src_rq, -(s64)p->ravg.demand);
+		fixup_cum_window_demand(dest_rq, p->ravg.demand);
+	}
+
 	if (p->ravg.curr_window) {
 		src_rq->curr_runnable_sum -= p->ravg.curr_window;
 		dest_rq->curr_runnable_sum += p->ravg.curr_window;
-- 
cgit v1.2.3


From 38ddcff85af052ad99e4f3f0b6e9659b0ca10dcf Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Mon, 7 Aug 2017 15:46:13 +0100
Subject: FROMLIST: sched/fair: Use wake_q length as a hint for wake_wide

(from https://patchwork.kernel.org/patch/9895261/)

This patch adds a parameter to select_task_rq, sibling_count_hint
allowing the caller, where it has this information, to inform the
sched_class the number of tasks that are being woken up as part of
the same event.

The wake_q mechanism is one case where this information is available.

select_task_rq_fair can then use the information to detect that it
needs to widen the search space for task placement in order to avoid
overloading the last-level cache domain's CPUs.

                               * * *

The reason I am investigating this change is the following use case
on ARM big.LITTLE (asymmetrical CPU capacity): 1 task per CPU, which
all repeatedly do X amount of work then
pthread_barrier_wait (i.e. sleep until the last task finishes its X
and hits the barrier). On big.LITTLE, the tasks which get a "big" CPU
finish faster, and then those CPUs pull over the tasks that are still
running:

     v CPU v           ->time->

                    -------------
   0  (big)         11111  /333
                    -------------
   1  (big)         22222   /444|
                    -------------
   2  (LITTLE)      333333/
                    -------------
   3  (LITTLE)      444444/
                    -------------

Now when task 4 hits the barrier (at |) and wakes the others up,
there are 4 tasks with prev_cpu=<big> and 0 tasks with
prev_cpu=<little>. want_affine therefore means that we'll only look
in CPUs 0 and 1 (sd_llc), so tasks will be unnecessarily coscheduled
on the bigs until the next load balance, something like this:

     v CPU v           ->time->

                    ------------------------
   0  (big)         11111  /333  31313\33333
                    ------------------------
   1  (big)         22222   /444|424\4444444
                    ------------------------
   2  (LITTLE)      333333/          \222222
                    ------------------------
   3  (LITTLE)      444444/            \1111
                    ------------------------
                                 ^^^
                           underutilization

So, I'm trying to get want_affine = 0 for these tasks.

I don't _think_ any incarnation of the wakee_flips mechanism can help
us here because which task is waker and which tasks are wakees
generally changes with each iteration.

However pthread_barrier_wait (or more accurately FUTEX_WAKE) has the
nice property that we know exactly how many tasks are being woken, so
we can cheat.

It might be a disadvantage that we "widen" _every_ task that's woken in
an event, while select_idle_sibling would work fine for the first
sd_llc_size - 1 tasks.

IIUC, if wake_affine() behaves correctly this trick wouldn't be
necessary on SMP systems, so it might be best guarded by the presence
of SD_ASYM_CPUCAPACITY?

                               * * *

Final note..

In order to observe "perfect" behaviour for this use case, I also had
to disable the TTWU_QUEUE sched feature. Suppose during the wakeup
above we are working through the work queue and have placed tasks 3
and 2, and are about to place task 1:

     v CPU v           ->time->

                    --------------
   0  (big)         11111  /333  3
                    --------------
   1  (big)         22222   /444|4
                    --------------
   2  (LITTLE)      333333/      2
                    --------------
   3  (LITTLE)      444444/          <- Task 1 should go here
                    --------------

If TTWU_QUEUE is enabled, we will not yet have enqueued task
2 (having instead sent a reschedule IPI) or attached its load to CPU
2. So we are likely to also place task 1 on cpu 2. Disabling
TTWU_QUEUE means that we enqueue task 2 before placing task 1,
solving this issue. TTWU_QUEUE is there to minimise rq lock
contention, and I guess that this contention is less of an issue on
big.LITTLE systems since they have relatively few CPUs, which
suggests the trade-off makes sense here.

Change-Id: I2080302839a263e0841a89efea8589ea53bbda9c
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
---
 include/linux/sched.h    |  3 ++-
 kernel/sched/core.c      | 35 +++++++++++++++++++++++------------
 kernel/sched/deadline.c  |  3 ++-
 kernel/sched/fair.c      | 21 ++++++++++++++-------
 kernel/sched/idle_task.c |  3 ++-
 kernel/sched/rt.c        |  3 ++-
 kernel/sched/sched.h     |  3 ++-
 kernel/sched/stop_task.c |  3 ++-
 8 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62e179f84aef..60af9d2fa922 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -993,12 +993,13 @@ struct wake_q_node {
 struct wake_q_head {
 	struct wake_q_node *first;
 	struct wake_q_node **lastp;
+	int count;
 };
 
 #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
 
 #define WAKE_Q(name)					\
-	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+	struct wake_q_head name = { WAKE_Q_TAIL, &name.first, 0 }
 
 extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 563f316f5330..18d607f9a417 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -546,6 +546,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
 		return;
 
+	head->count++;
+
 	get_task_struct(task);
 
 	/*
@@ -555,6 +557,10 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	head->lastp = &node->next;
 }
 
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+	       int sibling_count_hint);
+
 void wake_up_q(struct wake_q_head *head)
 {
 	struct wake_q_node *node = head->first;
@@ -569,10 +575,10 @@ void wake_up_q(struct wake_q_head *head)
 		task->wake_q.next = NULL;
 
 		/*
-		 * wake_up_process() implies a wmb() to pair with the queueing
+		 * try_to_wake_up() implies a wmb() to pair with the queueing
 		 * in wake_q_add() so as not to miss wakeups.
 		 */
-		wake_up_process(task);
+		try_to_wake_up(task, TASK_NORMAL, 0, head->count);
 		put_task_struct(task);
 	}
 }
@@ -1642,12 +1648,14 @@ out:
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags,
+		   int sibling_count_hint)
 {
 	lockdep_assert_held(&p->pi_lock);
 
 	if (p->nr_cpus_allowed > 1)
-		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags,
+						     sibling_count_hint);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -1932,6 +1940,8 @@ static void ttwu_queue(struct task_struct *p, int cpu)
  * @p: the thread to be awakened
  * @state: the mask of task states that can be woken
  * @wake_flags: wake modifier flags (WF_*)
+ * @sibling_count_hint: A hint at the number of threads that are being woken up
+ *                      in this event.
  *
  * Put it on the run-queue if it's not already there. The "current"
  * thread is always on the run-queue (except when the actual
@@ -1943,7 +1953,8 @@ static void ttwu_queue(struct task_struct *p, int cpu)
  * or @state didn't match @p's state.
  */
 static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
+	       int sibling_count_hint)
 {
 	unsigned long flags;
 	int cpu, success = 0;
@@ -2044,8 +2055,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	if (p->sched_class->task_waking)
 		p->sched_class->task_waking(p);
 
-	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
-
+	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags,
+			     sibling_count_hint);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
 		set_task_cpu(p, cpu);
@@ -2127,13 +2138,13 @@ out:
  */
 int wake_up_process(struct task_struct *p)
 {
-	return try_to_wake_up(p, TASK_NORMAL, 0);
+	return try_to_wake_up(p, TASK_NORMAL, 0, 1);
 }
 EXPORT_SYMBOL(wake_up_process);
 
 int wake_up_state(struct task_struct *p, unsigned int state)
 {
-	return try_to_wake_up(p, state, 0);
+	return try_to_wake_up(p, state, 0, 1);
 }
 
 /*
@@ -2467,7 +2478,7 @@ void wake_up_new_task(struct task_struct *p)
 	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
 	 * as we're not fully set-up yet.
 	 */
-	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
+	__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0, 1));
 #endif
 	rq = __task_rq_lock(p);
 	update_rq_clock(rq);
@@ -2905,7 +2916,7 @@ void sched_exec(void)
 	int dest_cpu;
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
+	dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0, 1);
 	if (dest_cpu == smp_processor_id())
 		goto unlock;
 
@@ -3560,7 +3571,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode, wake_flags);
+	return try_to_wake_up(curr->private, mode, wake_flags, 1);
 }
 EXPORT_SYMBOL(default_wake_function);
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ab1a9a99660d..cdf2a0b97611 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1070,7 +1070,8 @@ static void yield_task_dl(struct rq *rq)
 static int find_later_rq(struct task_struct *task);
 
 static int
-select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags,
+		  int sibling_count_hint)
 {
 	struct task_struct *curr;
 	struct rq *rq;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9bc717ef81f5..b904a023be95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5773,15 +5773,18 @@ energy_diff(struct energy_env *eenv)
  * being client/server, worker/dispatcher, interrupt source or whatever is
  * irrelevant, spread criteria is apparent partner count exceeds socket size.
  */
-static int wake_wide(struct task_struct *p)
+static int wake_wide(struct task_struct *p, int sibling_count_hint)
 {
 	unsigned int master = current->wakee_flips;
 	unsigned int slave = p->wakee_flips;
-	int factor = this_cpu_read(sd_llc_size);
+	int llc_size = this_cpu_read(sd_llc_size);
+
+	if (sibling_count_hint >= llc_size)
+		return 1;
 
 	if (master < slave)
 		swap(master, slave);
-	if (slave < factor || master < slave * factor)
+	if (slave < llc_size || master < slave * llc_size)
 		return 0;
 	return 1;
 }
@@ -6754,7 +6757,8 @@ unlock:
  * preempt must be disabled.
  */
 static int
-select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
+select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags,
+		    int sibling_count_hint)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -6762,9 +6766,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
 
-	if (sd_flag & SD_BALANCE_WAKE)
-		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+	if (sd_flag & SD_BALANCE_WAKE) {
+		record_wakee(p);
+		want_affine = !wake_wide(p, sibling_count_hint) &&
+			      !wake_cap(p, cpu, prev_cpu) &&
+			      cpumask_test_cpu(cpu, &p->cpus_allowed);
+	}
 
 	if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
 		return select_energy_cpu_brute(p, prev_cpu, sync);
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index c4ae0f1fdf9b..33d7003fa1b8 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -9,7 +9,8 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags,
+		    int sibling_count_hint)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3715473fd8f8..069f8982867f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1372,7 +1372,8 @@ static void yield_task_rt(struct rq *rq)
 static int find_lowest_rq(struct task_struct *task);
 
 static int
-select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
+		  int sibling_count_hint)
 {
 	struct task_struct *curr;
 	struct rq *rq;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index deba080af845..a2d5de8415e1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1261,7 +1261,8 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
+	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags,
+			       int subling_count_hint);
 	void (*migrate_task_rq)(struct task_struct *p);
 
 	void (*task_waking) (struct task_struct *task);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 61f852d46858..a5567ccd8803 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -12,7 +12,8 @@
 
 #ifdef CONFIG_SMP
 static int
-select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
+select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags,
+		    int sibling_count_hint)
 {
 	return task_cpu(p); /* stop tasks as never migrate */
 }
-- 
cgit v1.2.3


From e79f447a9762f68d6ecf8371bcf3e970bceb662a Mon Sep 17 00:00:00 2001
From: Vikram Mulukutla <markivx@codeaurora.org>
Date: Thu, 10 Aug 2017 17:26:20 -0700
Subject: sched: walt: Correct WALT window size initialization

It is preferable that WALT window rollover occurs just
before a tick, since the tick is an opportune moment
to record a complete window's statistics, as well as report
those stats to the cpu frequency governor. When CONFIG_HZ
results in a TICK_NSEC that isn't a integral number, this
requirement may be violated. Account for this by reducing
the WALT window size to the nearest multiple of TICK_NSEC.

Commit d368c6faa19b ("sched: walt: fix window misalignment
when HZ=300") attempted to do this but WALT isn't using
MIN_SCHED_RAVG_WINDOW as the window size and the patch was
doing nothing.

Also, change the type of 'walt_disabled' to bool and warn
if an invalid window size causes WALT to be disabled.

Change-Id: Ie3dcfc21a3df4408254ca1165a355bbe391ed5c7
Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
---
 include/trace/events/sched.h |  2 +-
 kernel/sched/sched.h         |  2 +-
 kernel/sched/walt.c          | 46 +++++++++++++++++++++++++++-----------------
 kernel/sched/walt.h          |  2 +-
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index bf96bf05be82..9f7fd0c6662b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -642,7 +642,7 @@ TRACE_EVENT(sched_contrib_scale_f,
 extern unsigned int sysctl_sched_use_walt_cpu_util;
 extern unsigned int sysctl_sched_use_walt_task_util;
 extern unsigned int walt_ravg_window;
-extern unsigned int walt_disabled;
+extern bool walt_disabled;
 #endif
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a2d5de8415e1..dd86072eaf4e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1560,7 +1560,7 @@ static inline unsigned long capacity_orig_of(int cpu)
 
 extern unsigned int sysctl_sched_use_walt_cpu_util;
 extern unsigned int walt_ravg_window;
-extern unsigned int walt_disabled;
+extern bool walt_disabled;
 
 /*
  * cpu_util returns the amount of capacity of a CPU that is used by CFS
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 93f61486f989..8d25ffbe4fed 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -41,25 +41,17 @@ static __read_mostly unsigned int walt_io_is_busy = 0;
 
 unsigned int sysctl_sched_walt_init_task_load_pct = 15;
 
-/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
-unsigned int __read_mostly walt_disabled = 0;
+/* true -> use PELT based load stats, false -> use window-based load stats */
+bool __read_mostly walt_disabled = false;
 
-/* Window size (in ns) */
-__read_mostly unsigned int walt_ravg_window = 20000000;
-
-/* Min window size (in ns) = 10ms */
-#ifdef CONFIG_HZ_300
 /*
- * Tick interval becomes to 3333333 due to
- * rounding error when HZ=300.
+ * Window size (in ns). Adjust for the tick size so that the window
+ * rollover occurs just before the tick boundary.
  */
-#define MIN_SCHED_RAVG_WINDOW (3333333 * 6)
-#else
-#define MIN_SCHED_RAVG_WINDOW 10000000
-#endif
-
-/* Max window size (in ns) = 1s */
-#define MAX_SCHED_RAVG_WINDOW 1000000000
+__read_mostly unsigned int walt_ravg_window =
+					    (20000000 / TICK_NSEC) * TICK_NSEC;
+#define MIN_SCHED_RAVG_WINDOW ((10000000 / TICK_NSEC) * TICK_NSEC)
+#define MAX_SCHED_RAVG_WINDOW ((1000000000 / TICK_NSEC) * TICK_NSEC)
 
 static unsigned int sync_cpu;
 static ktime_t ktime_last;
@@ -180,10 +172,28 @@ static int exiting_task(struct task_struct *p)
 
 static int __init set_walt_ravg_window(char *str)
 {
+	unsigned int adj_window;
+	bool no_walt = walt_disabled;
+
 	get_option(&str, &walt_ravg_window);
 
-	walt_disabled = (walt_ravg_window < MIN_SCHED_RAVG_WINDOW ||
-				walt_ravg_window > MAX_SCHED_RAVG_WINDOW);
+	/* Adjust for CONFIG_HZ */
+	adj_window = (walt_ravg_window / TICK_NSEC) * TICK_NSEC;
+
+	/* Warn if we're a bit too far away from the expected window size */
+	WARN(adj_window < walt_ravg_window - NSEC_PER_MSEC,
+	     "tick-adjusted window size %u, original was %u\n", adj_window,
+	     walt_ravg_window);
+
+	walt_ravg_window = adj_window;
+
+	walt_disabled = walt_disabled ||
+			(walt_ravg_window < MIN_SCHED_RAVG_WINDOW ||
+			 walt_ravg_window > MAX_SCHED_RAVG_WINDOW);
+
+	WARN(!no_walt && walt_disabled,
+	     "invalid window size, disabling WALT\n");
+
 	return 0;
 }
 
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index f56c4da16d0b..de7edac43674 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -59,6 +59,6 @@ static inline u64 walt_ktime_clock(void) { return 0; }
 
 #endif /* CONFIG_SCHED_WALT */
 
-extern unsigned int walt_disabled;
+extern bool walt_disabled;
 
 #endif
-- 
cgit v1.2.3


From a21299785a502ca4b3592a0f977aa1202b105260 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <brendan.jackman@arm.com>
Date: Thu, 22 Sep 2016 12:25:56 +0100
Subject: sched/core: Warn if ENERGY_AWARE is enabled but data is missing

If the EAS energy model is missing or incomplete, i.e. sd_scs is NULL, then
sched_group_energy will return -EINVAL on the assumption that it raced with a
CPU hotplug event. In that case, energy_diff will return 0 and the energy-aware
wake path will silently fail to trigger any migrations.

This case can be triggered by disabling CONFIG_SCHED_MC on existing platforms,
so that there are no sched_groups with the SD_SHARE_CAP_STATES flag, so that
sd_scs is NULL.

Add checks so that a warning is printed if EAS is ever enabled while the
necessary data is not present.

Change-Id: Id233a510b5ad8b7fcecac0b1d789e730bbfc7c4a
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
---
 kernel/sched/core.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 18d607f9a417..4f11b84eaf0a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -91,6 +91,8 @@
 #include <trace/events/sched.h>
 #include "walt.h"
 
+static bool have_sched_energy_data(void);
+
 DEFINE_MUTEX(sched_domains_mutex);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
@@ -193,6 +195,10 @@ static int sched_feat_set(char *cmp)
 				sysctl_sched_features &= ~(1UL << i);
 				sched_feat_disable(i);
 			} else {
+				if (i == __SCHED_FEAT_ENERGY_AWARE)
+					WARN(!have_sched_energy_data(),
+					     "Missing sched energy data\n");
+
 				sysctl_sched_features |= (1UL << i);
 				sched_feat_enable(i);
 			}
@@ -6649,6 +6655,19 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
+static bool have_sched_energy_data(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (!rcu_dereference(per_cpu(sd_scs, cpu)) ||
+		    !rcu_dereference(per_cpu(sd_ea, cpu)))
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * Check that the per-cpu provided sd energy data is consistent for all cpus
  * within the mask.
@@ -7461,6 +7480,9 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 	}
 	rcu_read_unlock();
 
+	WARN(sched_feat(ENERGY_AWARE) && !have_sched_energy_data(),
+	     "Missing data for energy aware scheduling\n");
+
 	ret = 0;
 error:
 	__free_domain_allocs(&d, alloc_state, cpu_map);
-- 
cgit v1.2.3


From a899b9085c8d5d581b214c24dc707466e8cb479f Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Fri, 27 Oct 2017 13:23:05 -0700
Subject: sched/core: fix have_sched_energy_data build warning

have_sched_energy_data is defined only for CONFIG_SMP, so declare it
only with CONFIG_SMP.

Fixes warning from intel bot:

tree:   https://android.googlesource.com/kernel/msm android-4.4
head:   a21299785a502ca4b3592a0f977aa1202b105260
commit: a21299785a502ca4b3592a0f977aa1202b105260 [5/5] sched/core: Warn
if ENERGY_AWARE is enabled but data is missing
config: i386-randconfig-x002-201743 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        git checkout a21299785a502ca4b3592a0f977aa1202b105260
        # save the attached .config to linux build tree
        make ARCH=i386

All warnings (new ones prefixed by >>):

>> kernel//sched/core.c:94:13: warning: 'have_sched_energy_data' used
but never defined
    static bool have_sched_energy_data(void);
                ^~~~~~~~~~~~~~~~~~~~~~

vim +/have_sched_energy_data +94 kernel//sched/core.c

    93
  > 94  static bool have_sched_energy_data(void);
    95

Change-Id: I266b63ece6fb31d2b5b11821a8244e147ba6d3a4
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 kernel/sched/core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4f11b84eaf0a..4e5298a1977e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -91,7 +91,9 @@
 #include <trace/events/sched.h>
 #include "walt.h"
 
+#ifdef CONFIG_SMP
 static bool have_sched_energy_data(void);
+#endif
 
 DEFINE_MUTEX(sched_domains_mutex);
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -195,10 +197,11 @@ static int sched_feat_set(char *cmp)
 				sysctl_sched_features &= ~(1UL << i);
 				sched_feat_disable(i);
 			} else {
+#ifdef CONFIG_SMP
 				if (i == __SCHED_FEAT_ENERGY_AWARE)
 					WARN(!have_sched_energy_data(),
 					     "Missing sched energy data\n");
-
+#endif
 				sysctl_sched_features |= (1UL << i);
 				sched_feat_enable(i);
 			}
-- 
cgit v1.2.3


From 8f012745e7f6c847ae0843e4ed171e372748b822 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Wed, 25 Oct 2017 09:29:46 -0700
Subject: BACKPORT: arm64: relocatable: suppress R_AARCH64_ABS64 relocations in
 vmlinux

The linker routines that we rely on to produce a relocatable PIE binary
treat it as a shared ELF object in some ways, i.e., it emits symbol based
R_AARCH64_ABS64 relocations into the final binary since doing so would be
appropriate when linking a shared library that is subject to symbol
preemption. (This means that an executable can override certain symbols
that are exported by a shared library it is linked with, and that the
shared library *must* update all its internal references as well, and point
them to the version provided by the executable.)

Symbol preemption does not occur for OS hosted PIE executables, let alone
for vmlinux, and so we would prefer to get rid of these symbol based
relocations. This would allow us to simplify the relocation routines, and
to strip the .dynsym, .dynstr and .hash sections from the binary. (Note
that these are tiny, and are placed in the .init segment, but they clutter
up the vmlinux binary.)

Note that these R_AARCH64_ABS64 relocations are only emitted for absolute
references to symbols defined in the linker script, all other relocatable
quantities are covered by anonymous R_AARCH64_RELATIVE relocations that
simply list the offsets to all 64-bit values in the binary that need to be
fixed up based on the offset between the link time and run time addresses.

Fortunately, GNU ld has a -Bsymbolic option, which is intended for shared
libraries to allow them to ignore symbol preemption, and unconditionally
bind all internal symbol references to its own definitions. So set it for
our PIE binary as well, and get rid of the asoociated sections and the
relocation code that processes them.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: fixed conflict with __dynsym_offset linker script entry]
Signed-off-by: Will Deacon <will.deacon@arm.com>

Note: This backport only adds -Bsymbolic to LDFLAGS_vmlinux, but doesn't
remove R_AARCH64_ABS64 relocation handling, because those changes depend
on later refactoring of the code that we don't need in android-4.4.

Bug: 66932127
Change-Id: I56f664e02bc8d2fa3e5f496fb041bc3a8e1a4094
(cherry picked from commit 08cc55b2afd97a654f71b3bebf8bb0ec89fdc498)
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 arch/arm64/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index c3bc092fc128..53c054491254 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -16,7 +16,7 @@ OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
 
 ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux		+= -pie
+LDFLAGS_vmlinux		+= -pie -Bsymbolic
 endif
 
 KBUILD_DEFCONFIG := defconfig
-- 
cgit v1.2.3