diff options
| author | Raghuram Subramani <raghus2247@gmail.com> | 2024-10-17 17:33:46 +0530 | 
|---|---|---|
| committer | Raghuram Subramani <raghus2247@gmail.com> | 2024-10-17 17:33:46 +0530 | 
| commit | 321337c9e82f016a0cd64f81573c18b5731ffa8d (patch) | |
| tree | e9874bb042e851fec1e19bb8dfca694ef885456a /kernel/sched/rt.c | |
| parent | cc57cb4ee3b7918b74d30604735d353b9a5fa23b (diff) | |
Merge remote-tracking branch 'msm8998/lineage-20' into lineage-20
Change-Id: I126075a330f305c85f8fe1b8c9d408f368be95d1
Diffstat (limited to 'kernel/sched/rt.c')
| -rw-r--r-- | kernel/sched/rt.c | 331 | 
1 files changed, 228 insertions, 103 deletions
| diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ac81704e14d9..9d7f6998edd5 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1439,6 +1439,25 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)  }  /* + * Keep track of whether each cpu has an RT task that will + * soon schedule on that core. The problem this is intended + * to address is that we want to avoid entering a non-preemptible + * softirq handler if we are about to schedule a real-time + * task on that core. Ideally, we could just check whether + * the RT runqueue on that core had a runnable task, but the + * window between choosing to schedule a real-time task + * on a core and actually enqueueing it on that run-queue + * is large enough to lose races at an unacceptably high rate. + * + * This variable attempts to reduce that window by indicating + * when we have decided to schedule an RT task on a core + * but not yet enqueued it. + * This variable is a heuristic only: it is not guaranteed + * to be correct and may be updated without synchronization. + */ +DEFINE_PER_CPU(bool, incoming_rt_task); + +/*   * Adding/removing a task to/from a priority array:   */  static void @@ -1459,6 +1478,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)  	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)  		enqueue_pushable_task(rq, p); +	*per_cpu_ptr(&incoming_rt_task, cpu_of(rq)) = false;  	if (!schedtune_task_boost(p))  		return; @@ -1551,8 +1571,19 @@ static void yield_task_rt(struct rq *rq)  	requeue_task_rt(rq, rq->curr, 0);  } +/* + * Return whether the given cpu has (or will shortly have) an RT task + * ready to run. NB: This is a heuristic and is subject to races. + */ +bool +cpu_has_rt_task(int cpu) +{ +	struct rq *rq = cpu_rq(cpu); +	return rq->rt.rt_nr_running > 0 || per_cpu(incoming_rt_task, cpu); +} +  #ifdef CONFIG_SMP -static int find_lowest_rq(struct task_struct *task); +static int find_lowest_rq(struct task_struct *task, int sync);  #ifdef CONFIG_SCHED_HMP  static int @@ -1561,7 +1592,7 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)  	int target;  	rcu_read_lock(); -	target = find_lowest_rq(p); +	target = find_lowest_rq(p, 0);  	if (target != -1)  		cpu = target;  	rcu_read_unlock(); @@ -1573,8 +1604,10 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)  /*   * Return whether the task on the given cpu is currently non-preemptible   * while handling a potentially long softint, or if the task is likely - * to block preemptions soon because it is a ksoftirq thread that is - * handling slow softints. + * to block preemptions soon because (a) it is a ksoftirq thread that is + * handling slow softints, (b) it is idle and therefore likely to start + * processing the irq's immediately, (c) the cpu is currently handling + * hard irq's and will soon move on to the softirq handler.   */  bool  task_may_not_preempt(struct task_struct *task, int cpu) @@ -1584,8 +1617,9 @@ task_may_not_preempt(struct task_struct *task, int cpu)  	struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);  	return ((softirqs & LONG_SOFTIRQ_MASK) && -		(task == cpu_ksoftirqd || -		 task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); +		(task == cpu_ksoftirqd || is_idle_task(task) || +		 (task_thread_info(task)->preempt_count +		     & (HARDIRQ_MASK | SOFTIRQ_MASK))));  }  /* @@ -1618,9 +1652,11 @@ static int  select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,  		  int sibling_count_hint)  { -	struct task_struct *curr; +	struct task_struct *curr, *tgt_task;  	struct rq *rq;  	bool may_not_preempt; +	int target; +	int sync = flags & WF_SYNC;  #ifdef CONFIG_SCHED_HMP  	return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); @@ -1635,58 +1671,28 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,  	rcu_read_lock();  	curr = READ_ONCE(rq->curr); /* unlocked access */ -	/* -	 * If the current task on @p's runqueue is a softirq task, -	 * it may run without preemption for a time that is -	 * ill-suited for a waiting RT task. Therefore, try to -	 * wake this RT task on another runqueue. -	 * -	 * Also, if the current task on @p's runqueue is an RT task, then -	 * it may run without preemption for a time that is -	 * ill-suited for a waiting RT task. Therefore, try to -	 * wake this RT task on another runqueue. -	 * -	 * Also, if the current task on @p's runqueue is an RT task, then -	 * try to see if we can wake this RT task up on another -	 * runqueue. Otherwise simply start this RT task -	 * on its current runqueue. -	 * -	 * We want to avoid overloading runqueues. If the woken -	 * task is a higher priority, then it will stay on this CPU -	 * and the lower prio task should be moved to another CPU. -	 * Even though this will probably make the lower prio task -	 * lose its cache, we do not want to bounce a higher task -	 * around just because it gave up its CPU, perhaps for a -	 * lock? -	 * -	 * For equal prio tasks, we just let the scheduler sort it out. -	 * -	 * Otherwise, just let it ride on the affined RQ and the -	 * post-schedule router will push the preempted task away -	 * -	 * This test is optimistic, if we get it wrong the load-balancer -	 * will have to sort it out. -	 */  	may_not_preempt = task_may_not_preempt(curr, cpu); -	if (may_not_preempt || -	    (unlikely(rt_task(curr)) && -	    (curr->nr_cpus_allowed < 2 || -	     curr->prio <= p->prio))) { -		int target = find_lowest_rq(p); +	target = find_lowest_rq(p, sync); -		/* -		 * If cpu is non-preemptible, prefer remote cpu -		 * even if it's running a higher-prio task. -		 * Otherwise: Don't bother moving it if the -		 * destination CPU is not running a lower priority task. -		 */ -		if (target != -1 && -		   (may_not_preempt || -		    p->prio < cpu_rq(target)->rt.highest_prio.curr)) -			cpu = target; +	/* +	 * Check once for losing a race with the other core's irq handler. +	 * This does not happen frequently, but it can avoid delaying +	 * the execution of the RT task in those cases. +	 */ +	if (target != -1) { +		tgt_task = READ_ONCE(cpu_rq(target)->curr); +		if (task_may_not_preempt(tgt_task, target)) +			target = find_lowest_rq(p, sync);  	} +	/* +	 * Possible race. Don't bother moving it if the +	 * destination CPU is not running a lower priority task. +	 */ +	if (target != -1 && +	    (may_not_preempt || p->prio < cpu_rq(target)->rt.highest_prio.curr)) +		cpu = target; +	*per_cpu_ptr(&incoming_rt_task, cpu) = true;  	rcu_read_unlock(); -  out:  	/*  	 * If previous CPU was different, make sure to cancel any active @@ -1730,7 +1736,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)  	requeue_task_rt(rq, p, 1);  	resched_curr(rq);  } -  #endif /* CONFIG_SMP */  /* @@ -1994,12 +1999,108 @@ retry:  }  #endif	/* CONFIG_SCHED_HMP */ -static int find_lowest_rq(struct task_struct *task) +static int find_best_rt_target(struct task_struct* task, int cpu, +			      struct cpumask* lowest_mask, +			      bool boosted, bool prefer_idle) { +	int iter_cpu; +	int target_cpu = -1; +	int boosted_cpu = -1; +	int backup_cpu = -1; +	int boosted_orig_capacity = capacity_orig_of(0); +	int backup_capacity = 0; +	int best_idle_cpu = -1; +	unsigned long target_util = 0; +	unsigned long new_util; +	/* We want to elect the best one based on task class, +	 * idleness, and utilization. +	 */ +	for (iter_cpu = 0; iter_cpu < NR_CPUS; iter_cpu++) { +		int cur_capacity; +		/* +		 * Iterate from higher cpus for boosted tasks. +		 */ +		int i = boosted ? NR_CPUS-iter_cpu-1 : iter_cpu; +		if (!cpu_online(i) || !cpumask_test_cpu(i, tsk_cpus_allowed(task))) +			continue; + +		new_util = cpu_util(i) + task_util(task); + +		if (new_util > capacity_orig_of(i)) +			continue; + +		/* +		 * Unconditionally favoring tasks that prefer idle cpus to +		 * improve latency. +		 */ +		if (idle_cpu(i) && prefer_idle +		    && cpumask_test_cpu(i, lowest_mask) && best_idle_cpu < 0) { +			best_idle_cpu = i; +			continue; +		} + +		if (cpumask_test_cpu(i, lowest_mask)) { +			/* Bias cpu selection towards cpu with higher original +			 * capacity if task is boosted. +			 * Assumption: Higher cpus are exclusively alloted for +			 * boosted tasks. +			 */ +			if (boosted && boosted_cpu < 0 +			    && boosted_orig_capacity < capacity_orig_of(i)) { +				boosted_cpu = i; +				boosted_orig_capacity = capacity_orig_of(i); +			} +			cur_capacity = capacity_curr_of(i); +			if (new_util < cur_capacity && cpu_rq(i)->nr_running) { +				if(!boosted) { +					/* Find a target cpu with highest utilization.*/ +					if (target_util < new_util) { +						target_cpu = i; +						target_util = new_util; +					} +				} else { +					if (target_util == 0 || target_util > new_util) { +					/* Find a target cpu with lowest utilization.*/ +						target_cpu = i; +						target_util = new_util; +					} +				} +			} else if (backup_capacity == 0 || backup_capacity < cur_capacity) { +				/* Select a backup CPU with highest capacity.*/ +				backup_capacity = cur_capacity; +				backup_cpu = i; +			} +		} +	} + +	if (boosted && boosted_cpu >=0 && boosted_cpu > best_idle_cpu) +		target_cpu = boosted_cpu; +	else if (prefer_idle && best_idle_cpu >= 0) +		target_cpu = best_idle_cpu; + +	if (target_cpu < 0) { +		if (backup_cpu >= 0) +			return backup_cpu; + +		/* Select current cpu if it is present in the mask.*/ +		if (cpumask_test_cpu(cpu, lowest_mask)) +			return cpu; + +		/* Pick a random cpu from lowest_mask */ +		target_cpu = cpumask_any(lowest_mask); +		if (target_cpu < nr_cpu_ids) +			return target_cpu; +		return -1; +	} +	return target_cpu; +} + +static int find_lowest_rq(struct task_struct *task, int sync)  {  	struct sched_domain *sd;  	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);  	int this_cpu = smp_processor_id();  	int cpu      = task_cpu(task); +	bool boosted, prefer_idle;  #ifdef CONFIG_SCHED_HMP  	return find_lowest_rq_hmp(task); @@ -2012,64 +2113,88 @@ static int find_lowest_rq(struct task_struct *task)  	if (task->nr_cpus_allowed == 1)  		return -1; /* No other targets possible */ +	/* Constructing cpumask of lowest priorities */  	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))  		return -1; /* No targets found */ -	/* -	 * At this point we have built a mask of cpus representing the -	 * lowest priority tasks in the system.  Now we want to elect -	 * the best one based on our affinity and topology. -	 * -	 * We prioritize the last cpu that the task executed on since -	 * it is most likely cache-hot in that location. +	/* Return current cpu if WF_SYNC hint is set and present in +	 * lowest_mask. Improves data locality.  	 */ -	if (cpumask_test_cpu(cpu, lowest_mask)) -		return cpu; +	if (sysctl_sched_sync_hint_enable && sync) { +		cpumask_t search_cpus; +		cpumask_and(&search_cpus, tsk_cpus_allowed(task), lowest_mask); +		if (cpumask_test_cpu(cpu, &search_cpus)) +			return cpu; +	}  	/* -	 * Otherwise, we consult the sched_domains span maps to figure -	 * out which cpu is logically closest to our hot cache data. +	 * At this point we have built a mask of cpus representing the +	 * lowest priority tasks in the system.  	 */ -	if (!cpumask_test_cpu(this_cpu, lowest_mask)) -		this_cpu = -1; /* Skip this_cpu opt if not among lowest */ -	rcu_read_lock(); -	for_each_domain(cpu, sd) { -		if (sd->flags & SD_WAKE_AFFINE) { -			int best_cpu; - -			/* -			 * "this_cpu" is cheaper to preempt than a -			 * remote processor. -			 */ -			if (this_cpu != -1 && -			    cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { -				rcu_read_unlock(); -				return this_cpu; -			} +	boosted = schedtune_task_boost(task) > 0; +	prefer_idle = schedtune_prefer_idle(task) > 0; +	if(boosted || prefer_idle) { +		return find_best_rt_target(task, cpu, lowest_mask, boosted, prefer_idle); +	} else { +		/* Now we want to elect the best one based on on our affinity +		 * and topology. +		 * We prioritize the last cpu that the task executed on since +		 * it is most likely cache-hot in that location. +		 */ +		struct task_struct* curr; +		if (!cpumask_test_cpu(this_cpu, lowest_mask)) +			this_cpu = -1; /* Skip this_cpu opt if not among lowest */ +		rcu_read_lock(); +		for_each_domain(cpu, sd) { +			if (sd->flags & SD_WAKE_AFFINE) { +				int best_cpu; +				/* +				 * "this_cpu" is cheaper to preempt than a +				 * remote processor. +				 */ +				if (this_cpu != -1 && +				    cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { +					curr = cpu_rq(this_cpu)->curr; +					/* Ensuring that boosted/prefer idle +					 * tasks are not pre-empted even if low +					 * priority*/ +					if (!curr || (schedtune_task_boost(curr) == 0 +					    && schedtune_prefer_idle(curr) == 0)) { +						rcu_read_unlock(); +						return this_cpu; +					} +				} -			best_cpu = cpumask_first_and(lowest_mask, -						     sched_domain_span(sd)); -			if (best_cpu < nr_cpu_ids) { -				rcu_read_unlock(); -				return best_cpu; +				best_cpu = cpumask_first_and(lowest_mask, +							     sched_domain_span(sd)); +				if (best_cpu < nr_cpu_ids) { +					curr = cpu_rq(best_cpu)->curr; +					/* Ensuring that boosted/prefer idle +					 * tasks are not pre-empted even if low +					 * priority*/ +					if(!curr || (schedtune_task_boost(curr) == 0 +						     && schedtune_prefer_idle(curr) == 0)) { +						rcu_read_unlock(); +						return best_cpu; +					} +				}  			}  		} -	} -	rcu_read_unlock(); +		rcu_read_unlock(); -	/* -	 * And finally, if there were no matches within the domains -	 * just give the caller *something* to work with from the compatible -	 * locations. -	 */ -	if (this_cpu != -1) -		return this_cpu; +		/* And finally, if there were no matches within the domains just +		 * give the caller *something* to work with from the compatible +		 * locations. +		 */ +		if (this_cpu != -1) +			return this_cpu; -	cpu = cpumask_any(lowest_mask); -	if (cpu < nr_cpu_ids) -		return cpu; -	return -1; +		cpu = cpumask_any(lowest_mask); +		if (cpu < nr_cpu_ids) +			return cpu; +		return -1; +	}  }  /* Will lock the rq it finds */ @@ -2080,7 +2205,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)  	int cpu;  	for (tries = 0; tries < RT_MAX_TRIES; tries++) { -		cpu = find_lowest_rq(task); +		cpu = find_lowest_rq(task, 0);  		if ((cpu == -1) || (cpu == rq->cpu))  			break; | 
