From f4fb6f1523d43cd2b56f99559ba4b86b83901a25 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 8 Sep 2017 20:57:10 +0200 Subject: padata: ensure the reorder timer callback runs on the correct CPU commit cf5868c8a22dc2854b96e9569064bb92365549ca upstream. The reorder timer function runs on the CPU where the timer interrupt was handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask set. Ensure the padata_reorder() callback runs on the correct CPU, which is one in the 'pcpu' CPU mask set and, preferrably, the next expected one. Do so by comparing the current CPU with the expected target CPU. If they match, call padata_reorder() right away. If they differ, schedule a work item on the target CPU that does the padata_reorder() call for us. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/padata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 438694650471..963e69ff4659 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -85,6 +85,7 @@ struct padata_serial_queue { * @swork: work struct for serialization. * @pd: Backpointer to the internal control structure. * @work: work struct for parallelization. + * @reorder_work: work struct for reordering. * @num_obj: Number of objects that are processed by this cpu. * @cpu_index: Index of the cpu. */ @@ -93,6 +94,7 @@ struct padata_parallel_queue { struct padata_list reorder; struct parallel_data *pd; struct work_struct work; + struct work_struct reorder_work; atomic_t num_obj; int cpu_index; }; -- cgit v1.2.3 From 3c74e59f30ccfb4937a9814631cc828f64427de3 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 8 Sep 2017 20:57:11 +0200 Subject: padata: ensure padata_do_serial() runs on the correct CPU commit 350ef88e7e922354f82a931897ad4a4ce6c686ff upstream. If the algorithm we're parallelizing is asynchronous we might change CPUs between padata_do_parallel() and padata_do_serial(). However, we don't expect this to happen as we need to enqueue the padata object into the per-cpu reorder queue we took it from, i.e. the same-cpu's parallel queue. Ensure we're not switching CPUs for a given padata object by tracking the CPU within the padata object. If the serial callback gets called on the wrong CPU, defer invoking padata_reorder() via a kernel worker on the CPU we're expected to run on. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/padata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 963e69ff4659..e74d61fa50fe 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -37,6 +37,7 @@ * @list: List entry, to attach to the padata lists. * @pd: Pointer to the internal control structure. * @cb_cpu: Callback cpu for serializatioon. + * @cpu: Cpu for parallelization. * @seq_nr: Sequence number of the parallelized data object. * @info: Used to pass information from the parallel to the serial function. * @parallel: Parallel execution function. @@ -46,6 +47,7 @@ struct padata_priv { struct list_head list; struct parallel_data *pd; int cb_cpu; + int cpu; int info; void (*parallel)(struct padata_priv *padata); void (*serial)(struct padata_priv *padata); -- cgit v1.2.3 From 8539c5151a601593791bd35cf8c7f64a9dba5388 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 May 2020 16:51:42 -0400 Subject: sched/fair, cpumask: Export for_each_cpu_wrap() [ Upstream commit c743f0a5c50f2fcbc628526279cfa24f3dabe182 ] More users for for_each_cpu_wrap() have appeared. Promote the construct to generic cpumask interface. The implementation is slightly modified to reduce arguments. Signed-off-by: Peter Zijlstra (Intel) Cc: Lauro Ramos Venancio Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: lwang@redhat.com Link: http://lkml.kernel.org/r/20170414122005.o35me2h5nowqkxbv@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar [dj: include only what's added to the cpumask interface, 4.4 doesn't have them in the scheduler] Signed-off-by: Daniel Jordan Signed-off-by: Sasha Levin --- include/linux/cpumask.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bb3a4bb35183..1322883e7b46 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -232,6 +232,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); + +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask poiter + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator -- cgit v1.2.3 From 9d62e9fe86fe69a10f3126c6d3bf597d9816de60 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 May 2020 16:51:43 -0400 Subject: padata: Replace delayed timer with immediate workqueue in padata_reorder [ Upstream commit 6fc4dbcf0276279d488c5fbbfabe94734134f4fa ] The function padata_reorder will use a timer when it cannot progress while completed jobs are outstanding (pd->reorder_objects > 0). This is suboptimal as if we do end up using the timer then it would have introduced a gratuitous delay of one second. In fact we can easily distinguish between whether completed jobs are outstanding and whether we can make progress. All we have to do is look at the next pqueue list. This patch does that by replacing pd->processed with pd->cpu so that the next pqueue is more accessible. A work queue is used instead of the original try_again to avoid hogging the CPU. Note that we don't bother removing the work queue in padata_flush_queues because the whole premise is broken. You cannot flush async crypto requests so it makes no sense to even try. A subsequent patch will fix it by replacing it with a ref counting scheme. Signed-off-by: Herbert Xu [dj: - adjust context - corrected setup_timer -> timer_setup to delete hunk - skip padata_flush_queues() hunk, function already removed in 4.4] Signed-off-by: Daniel Jordan Signed-off-by: Sasha Levin --- include/linux/padata.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index e74d61fa50fe..547a8d1e4a3b 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -85,18 +84,14 @@ struct padata_serial_queue { * @serial: List to wait for serialization after reordering. * @pwork: work struct for parallelization. * @swork: work struct for serialization. - * @pd: Backpointer to the internal control structure. * @work: work struct for parallelization. - * @reorder_work: work struct for reordering. * @num_obj: Number of objects that are processed by this cpu. * @cpu_index: Index of the cpu. */ struct padata_parallel_queue { struct padata_list parallel; struct padata_list reorder; - struct parallel_data *pd; struct work_struct work; - struct work_struct reorder_work; atomic_t num_obj; int cpu_index; }; @@ -122,10 +117,10 @@ struct padata_cpumask { * @reorder_objects: Number of objects waiting in the reorder queues. * @refcnt: Number of objects holding a reference on this parallel_data. * @max_seq_nr: Maximal used sequence number. + * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. + * @reorder_work: work struct for reordering. * @lock: Reorder lock. - * @processed: Number of already processed objects. - * @timer: Reorder timer. */ struct parallel_data { struct padata_instance *pinst; @@ -134,10 +129,10 @@ struct parallel_data { atomic_t reorder_objects; atomic_t refcnt; atomic_t seq_nr; + int cpu; struct padata_cpumask cpumask; + struct work_struct reorder_work; spinlock_t lock ____cacheline_aligned; - unsigned int processed; - struct timer_list timer; }; /** -- cgit v1.2.3 From d222f4ad7bfc2f78f3865535dfce47deab9d6f99 Mon Sep 17 00:00:00 2001 From: "R. Parameswaran" Date: Fri, 22 May 2020 00:57:22 +0100 Subject: New kernel function to get IP overhead on a socket. commit 113c3075931a334f899008f6c753abe70a3a9323 upstream. A new function, kernel_sock_ip_overhead(), is provided to calculate the cumulative overhead imposed by the IP Header and IP options, if any, on a socket's payload. The new function returns an overhead of zero for sockets that do not belong to the IPv4 or IPv6 address families. This is used in the L2TP code path to compute the total outer IP overhead on the L2TP tunnel socket when calculating the default MTU for Ethernet pseudowires. Signed-off-by: R. Parameswaran Signed-off-by: David S. Miller Signed-off-by: Giuliano Procida Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index c00b8d182226..a0c6c00ac166 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -291,6 +291,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); +/* Following routine returns the IP overhead imposed by a socket. */ +u32 kernel_sock_ip_overhead(struct sock *sk); + #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) -- cgit v1.2.3 From 403a7a561a0347368d84b53b03fc03c2d23b8861 Mon Sep 17 00:00:00 2001 From: "R. Parameswaran" Date: Wed, 12 Apr 2017 18:31:04 -0700 Subject: l2tp: device MTU setup, tunnel socket needs a lock commit 57240d007816486131bee88cd474c2a71f0fe224 upstream. The MTU overhead calculation in L2TP device set-up merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff needs to be adjusted to lock the tunnel socket while referencing the sub-data structures to derive the socket's IP overhead. Reported-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: R. Parameswaran Signed-off-by: David S. Miller Cc: Giuliano Procida Signed-off-by: Greg Kroah-Hartman --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index a0c6c00ac166..6de18ead3dfe 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -291,7 +291,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); -/* Following routine returns the IP overhead imposed by a socket. */ +/* Routine returns the IP overhead imposed by a (caller-protected) socket. */ u32 kernel_sock_ip_overhead(struct sock *sk); #define MODULE_ALIAS_NETPROTO(proto) \ -- cgit v1.2.3 From c0f84de8a48541157e4c386cc590ea9a370b5cc5 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 14 Feb 2018 02:54:03 +0000 Subject: cpumask: Make for_each_cpu_wrap() available on UP as well commit d207af2eab3f8668b95ad02b21930481c42806fd upstream. for_each_cpu_wrap() was originally added in the #else half of a large "#if NR_CPUS == 1" statement, but was omitted in the #if half. This patch adds the missing #if half to prevent compile errors when NR_CPUS is 1. Reported-by: kbuild test robot Signed-off-by: Michael Kelley Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kys@microsoft.com Cc: martin.petersen@oracle.com Cc: mikelley@microsoft.com Fixes: c743f0a5c50f ("sched/fair, cpumask: Export for_each_cpu_wrap()") Link: http://lkml.kernel.org/r/SN6PR1901MB2045F087F59450507D4FCC17CBF50@SN6PR1901MB2045.namprd19.prod.outlook.com Signed-off-by: Ingo Molnar Cc: Signed-off-by: Greg Kroah-Hartman --- include/linux/cpumask.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1322883e7b46..f0a3fc723ae4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -160,6 +160,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) #else -- cgit v1.2.3