From 400383059868487869772b1c68ab8db4b6c81cbb Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Mon, 24 Sep 2018 16:25:55 -0700 Subject: kernel: time: Add delay after cpu_relax() in tight loops Tight loops of spin_lock_irqsave() and spin_unlock_irqrestore() in timer and hrtimer are causing scheduling delays. Add delay of few nano seconds after cpu_relax in the timer/hrtimer tight loops. Change-Id: Iaa0ab92da93f7b245b1d922b6edca2bebdc0fbce Signed-off-by: Prasad Sodagudi --- include/linux/time.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 62cc50700004..cbb55e004342 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -9,6 +9,7 @@ extern struct timezone sys_tz; #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) +#define TIMER_LOCK_TIGHT_LOOP_DELAY_NS 350 static inline int timespec_equal(const struct timespec *a, const struct timespec *b) -- cgit v1.2.3 From f31078b5825f71499fa95b85babf6ac8c776c37d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 20 Jun 2017 01:37:38 +0200 Subject: genirq: Introduce effective affinity mask There is currently no way to evaluate the effective affinity mask of a given interrupt. Many irq chips allow only a single target CPU or a subset of CPUs in the affinity mask. Updating the mask at the time of setting the affinity to the subset would be counterproductive because information for cpu hotplug about assigned interrupt affinities gets lost. On CPU hotplug it's also pointless to force migrate an interrupt, which is not targeted at the CPU effectively. But currently the information is not available. Provide a seperate mask to be updated by the irq_chip->irq_set_affinity() implementations. Implement the read only proc files so the user can see the effective mask as well w/o trying to deduce it from /proc/interrupts. Signed-off-by: Thomas Gleixner Cc: Jens Axboe Cc: Marc Zyngier Cc: Michael Ellerman Cc: Keith Busch Cc: Peter Zijlstra Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20170619235446.247834245@linutronix.de Change-Id: Ibeec0031edb532d52cb411286f785aec160d6139 --- include/linux/irq.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 8da001eb82aa..0e57f41bde84 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -136,6 +136,9 @@ struct irq_domain; * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods * @affinity: IRQ affinity on SMP + * @effective_affinity: The effective IRQ affinity on SMP as some irq + * chips do not allow multi CPU destinations. + * A subset of @affinity. * @msi_desc: MSI descriptor */ struct irq_common_data { @@ -146,6 +149,9 @@ struct irq_common_data { void *handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + cpumask_var_t effective_affinity; +#endif }; /** @@ -690,6 +696,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) return d->common->affinity; } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->effective_affinity; +} +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ + cpumask_copy(d->common->effective_affinity, m); +} +#else +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ +} +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->affinity; +} +#endif + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, -- cgit v1.2.3 From 13e66175965635ed74c948f232a32033230cc5d0 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Thu, 28 Feb 2019 10:40:39 +0530 Subject: cpuset: Restore tasks affinity while moving across cpusets When tasks move across cpusets, the current affinity settings are lost. Cache the task affinity and restore it during cpuset migration. The restoring happens only when the cached affinity is subset of the current cpuset settings. Change-Id: I6c2ec1d5e3d994e176926d94b9e0cc92418020cc Signed-off-by: Pavankumar Kondeti --- include/linux/init_task.h | 1 + include/linux/sched.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 021b1e9ff6cd..8aed56931361 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -208,6 +208,7 @@ extern struct task_group root_task_group; .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .nr_cpus_allowed= NR_CPUS, \ + .cpus_requested = CPU_MASK_ALL, \ .mm = NULL, \ .active_mm = &init_mm, \ .restart_block = { \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 70c1f7f9e4fa..9cb6964d178e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1700,6 +1700,7 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; + cpumask_t cpus_requested; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; -- cgit v1.2.3 From cbe0b37059c9c491c48b0d014f2ceb0f2e9e2e96 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 7 Jun 2018 17:05:28 -0700 Subject: mm: introduce arg_lock to protect arg_start|end and env_start|end in mm_struct mmap_sem is on the hot path of kernel, and it very contended, but it is abused too. It is used to protect arg_start|end and evn_start|end when reading /proc/$PID/cmdline and /proc/$PID/environ, but it doesn't make sense since those proc files just expect to read 4 values atomically and not related to VM, they could be set to arbitrary values by C/R. And, the mmap_sem contention may cause unexpected issue like below: INFO: task ps:14018 blocked for more than 120 seconds. Tainted: G E 4.9.79-009.ali3000.alios7.x86_64 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ps D 0 14018 1 0x00000004 Call Trace: schedule+0x36/0x80 rwsem_down_read_failed+0xf0/0x150 call_rwsem_down_read_failed+0x18/0x30 down_read+0x20/0x40 proc_pid_cmdline_read+0xd9/0x4e0 __vfs_read+0x37/0x150 vfs_read+0x96/0x130 SyS_read+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xc5 Both Alexey Dobriyan and Michal Hocko suggested to use dedicated lock for them to mitigate the abuse of mmap_sem. So, introduce a new spinlock in mm_struct to protect the concurrent access to arg_start|end, env_start|end and others, as well as replace write map_sem to read to protect the race condition between prctl and sys_brk which might break check_data_rlimit(), and makes prctl more friendly to other VM operations. This patch just eliminates the abuse of mmap_sem, but it can't resolve the above hung task warning completely since the later access_remote_vm() call needs acquire mmap_sem. The mmap_sem scalability issue will be solved in the future. Change-Id: Ifa8f001ee2fc4f0ce60c18e771cebcf8a1f0943e [yang.shi@linux.alibaba.com: add comment about mmap_sem and arg_lock] Link: http://lkml.kernel.org/r/1524077799-80690-1-git-send-email-yang.shi@linux.alibaba.com Link: http://lkml.kernel.org/r/1523730291-109696-1-git-send-email-yang.shi@linux.alibaba.com Signed-off-by: Yang Shi Reviewed-by: Cyrill Gorcunov Acked-by: Michal Hocko Cc: Alexey Dobriyan Cc: Matthew Wilcox Cc: Mateusz Guzik Cc: Kirill Tkhai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Git-commit: 88aa7cc688d48ddd84558b41d5905a0db9535c4b Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Srinivas Ramana --- include/linux/mm_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 29c17fae9bbf..1019e8d3c88f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -442,6 +442,8 @@ struct mm_struct { unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ unsigned long def_flags; + + spinlock_t arg_lock; /* protect the below fields */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; -- cgit v1.2.3 From 90dccbae4c0410e209049e22eaae9cd2718e1aa5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 5 Mar 2019 15:43:06 -0800 Subject: UPSTREAM: mm: reuse only-pte-mapped KSM page in do_wp_page() Add an optimization for KSM pages almost in the same way that we have for ordinary anonymous pages. If there is a write fault in a page, which is mapped to an only pte, and it is not related to swap cache; the page may be reused without copying its content. [ Note that we do not consider PageSwapCache() pages at least for now, since we don't want to complicate __get_ksm_page(), which has nice optimization based on this (for the migration case). Currenly it is spinning on PageSwapCache() pages, waiting for when they have unfreezed counters (i.e., for the migration finish). But we don't want to make it also spinning on swap cache pages, which we try to reuse, since there is not a very high probability to reuse them. So, for now we do not consider PageSwapCache() pages at all. ] So in reuse_ksm_page() we check for 1) PageSwapCache() and 2) page_stable_node(), to skip a page, which KSM is currently trying to link to stable tree. Then we do page_ref_freeze() to prohibit KSM to merge one more page into the page, we are reusing. After that, nobody can refer to the reusing page: KSM skips !PageSwapCache() pages with zero refcount; and the protection against of all other participants is the same as for reused ordinary anon pages pte lock, page lock and mmap_sem. [akpm@linux-foundation.org: replace BUG_ON()s with WARN_ON()s] Link: http://lkml.kernel.org/r/154471491016.31352.1168978849911555609.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Reviewed-by: Yang Shi Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: Christian Koenig Cc: Claudio Imbrenda Cc: Rik van Riel Cc: Huang Ying Cc: Minchan Kim Cc: Kirill Tkhai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: If32387b1f7c36f0e12fcbb0926bf1b67886ec594 --- include/linux/ksm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 481c8c4627ca..febba394f93c 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -62,6 +62,8 @@ struct page *ksm_might_need_to_copy(struct page *page, int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); +bool reuse_ksm_page(struct page *page, + struct vm_area_struct *vma, unsigned long address); #else /* !CONFIG_KSM */ @@ -102,6 +104,11 @@ static inline int rmap_walk_ksm(struct page *page, static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) { } +static inline bool reuse_ksm_page(struct page *page, + struct vm_area_struct *vma, unsigned long address) +{ + return false; +} #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ -- cgit v1.2.3 From 899def5edcd48ff16c0f41624ecf8be77c81a18d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Aug 2020 19:49:56 -0400 Subject: UPSTREAM: mm/ksm: Remove reuse_ksm_page() Remove the function as the last reference has gone away with the do_wp_page() changes. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds Change-Id: Ie4da88791abe9407157566854b2db9b94c0c962f --- include/linux/ksm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index febba394f93c..481c8c4627ca 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -62,8 +62,6 @@ struct page *ksm_might_need_to_copy(struct page *page, int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); -bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address); #else /* !CONFIG_KSM */ @@ -104,11 +102,6 @@ static inline int rmap_walk_ksm(struct page *page, static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) { } -static inline bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address) -{ - return false; -} #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ -- cgit v1.2.3 From 6adb092856e806d91f3fc22dff0ef36506dd0bae Mon Sep 17 00:00:00 2001 From: Vikram Mulukutla Date: Tue, 6 Jun 2017 11:58:27 -0700 Subject: sched: cpufreq: Limit governor updates to WALT changes alone It's not necessary to keep reporting load to the governor if it doesn't change in a window. Limit updates to when we expect load changes - after window rollover and when we send updates related to intercluster migrations. [beykerykt]: Adapt for HMP Change-Id: I3232d40f3d54b0b81cfafdcdb99b534df79327bf Signed-off-by: Vikram Mulukutla --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9cb6964d178e..0e8fff43cc17 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3724,6 +3724,7 @@ static inline unsigned long rlimit_max(unsigned int limit) #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) #define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3) +#define SCHED_CPUFREQ_WALT (1U << 4) #ifdef CONFIG_CPU_FREQ struct update_util_data { -- cgit v1.2.3 From c0fa7577022c4169e1aaaf1bd9e04f63d285beb2 Mon Sep 17 00:00:00 2001 From: Ethan Chen Date: Sat, 20 Jan 2018 16:35:53 -0800 Subject: sched/walt: Re-add code to allow WALT to function Change-Id: Ieb1067c5e276f872ed4c722b7d1fabecbdad87e7 --- include/linux/sched.h | 9 +++++++++ include/linux/sched/sysctl.h | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0e8fff43cc17..4e212132a274 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1668,6 +1668,15 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_WALT + struct ravg ravg; + /* + * 'init_load_pct' represents the initial task load assigned to children + * of this task + */ + u32 init_load_pct; + u64 last_sleep_ts; +#endif #ifdef CONFIG_SCHED_HMP struct ravg ravg; /* diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 1e1fcb8791a7..c85fe9872d07 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,6 +41,12 @@ extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_cstate_aware; +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int sysctl_sched_walt_init_task_load_pct; +extern unsigned int sysctl_sched_walt_cpu_high_irqload; +#endif #ifdef CONFIG_SCHED_HMP -- cgit v1.2.3 From 9539942cb065e9ec5749a89077b12fc3a0c51b0a Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 18 May 2017 22:41:33 -0700 Subject: FROMLIST: cpufreq: Make iowait boost a policy option Make iowait boost a cpufreq policy option and enable it for intel_pstate cpufreq driver. Governors like schedutil can use it to determine if boosting for tasks that wake up with p->in_iowait set is needed. Bug: 38010527 Link: https://lkml.org/lkml/2017/5/19/43 Change-Id: Icf59e75fbe731dc67abb28fb837f7bb0cd5ec6cc Signed-off-by: Joel Fernandes --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9302d016b89f..8e9d08dfbd18 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -123,6 +123,9 @@ struct cpufreq_policy { unsigned int up_transition_delay_us; unsigned int down_transition_delay_us; + /* Boost switch for tasks with p->in_iowait set */ + bool iowait_boost_enable; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; -- cgit v1.2.3