diff options
Diffstat (limited to 'drivers/cpuidle/lpm-levels.c')
-rw-r--r-- | drivers/cpuidle/lpm-levels.c | 2040 |
1 files changed, 2040 insertions, 0 deletions
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c new file mode 100644 index 000000000000..584a1857624a --- /dev/null +++ b/drivers/cpuidle/lpm-levels.c @@ -0,0 +1,2040 @@ +/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved. + * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com> + * Copyright (C) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/irqchip/msm-mpm-irq.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/tick.h> +#include <linux/suspend.h> +#include <linux/pm_qos.h> +#include <linux/of_platform.h> +#include <linux/smp.h> +#include <linux/remote_spinlock.h> +#include <linux/msm_remote_spinlock.h> +#include <linux/dma-mapping.h> +#include <linux/coresight-cti.h> +#include <linux/moduleparam.h> +#include <linux/sched.h> +#include <linux/cpu_pm.h> +#include <linux/arm-smccc.h> +#include <soc/qcom/spm.h> +#include <soc/qcom/pm.h> +#include <soc/qcom/rpm-notifier.h> +#include <soc/qcom/event_timer.h> +#include <soc/qcom/lpm-stats.h> +#include <soc/qcom/jtag.h> +#include <soc/qcom/minidump.h> +#include <asm/cputype.h> +#include <asm/arch_timer.h> +#include <asm/cacheflush.h> +#include <asm/suspend.h> +#include <asm/cpuidle.h> +#include "lpm-levels.h" +#include "lpm-workarounds.h" +#include <trace/events/power.h> +#define CREATE_TRACE_POINTS +#include <trace/events/trace_msm_low_power.h> +#include "../../drivers/clk/msm/clock.h" + +#define SCLK_HZ (32768) +#define SCM_HANDOFF_LOCK_ID "S:7" +#define PSCI_POWER_STATE(reset) (reset << 30) +#define PSCI_AFFINITY_LEVEL(lvl) ((lvl & 0x3) << 24) +static remote_spinlock_t scm_handoff_lock; + +enum { + MSM_LPM_LVL_DBG_SUSPEND_LIMITS = BIT(0), + MSM_LPM_LVL_DBG_IDLE_LIMITS = BIT(1), +}; + +enum debug_event { + CPU_ENTER, + CPU_EXIT, + CLUSTER_ENTER, + CLUSTER_EXIT, + PRE_PC_CB, + CPU_HP_STARTING, + CPU_HP_DYING, +}; + +struct lpm_debug { + cycle_t time; + enum debug_event evt; + int cpu; + uint32_t arg1; + uint32_t arg2; + uint32_t arg3; + uint32_t arg4; +}; + +struct lpm_cluster *lpm_root_node; + +#define MAXSAMPLES 5 + +static bool lpm_prediction = true; +module_param_named(lpm_prediction, + lpm_prediction, bool, S_IRUGO | S_IWUSR | S_IWGRP); + +static uint32_t ref_stddev = 100; +module_param_named( + ref_stddev, ref_stddev, uint, S_IRUGO | S_IWUSR | S_IWGRP +); + +static uint32_t tmr_add = 100; +module_param_named( + tmr_add, tmr_add, uint, S_IRUGO | S_IWUSR | S_IWGRP +); + +struct lpm_history { + uint32_t resi[MAXSAMPLES]; + int mode[MAXSAMPLES]; + int nsamp; + uint32_t hptr; + uint32_t hinvalid; + uint32_t htmr_wkup; + int64_t stime; +}; + +static DEFINE_PER_CPU(struct lpm_history, hist); + +static DEFINE_PER_CPU(struct lpm_cluster*, cpu_cluster); +static bool suspend_in_progress; +static struct hrtimer lpm_hrtimer; +static struct hrtimer histtimer; +static struct lpm_debug *lpm_debug; +static phys_addr_t lpm_debug_phys; +static const int num_dbg_elements = 0x100; +static int lpm_cpu_callback(struct notifier_block *cpu_nb, + unsigned long action, void *hcpu); + +static void cluster_unprepare(struct lpm_cluster *cluster, + const struct cpumask *cpu, int child_idx, bool from_idle, + int64_t time); +static void cluster_prepare(struct lpm_cluster *cluster, + const struct cpumask *cpu, int child_idx, bool from_idle, + int64_t time); + +static struct notifier_block __refdata lpm_cpu_nblk = { + .notifier_call = lpm_cpu_callback, +}; + +static bool menu_select; +module_param_named( + menu_select, menu_select, bool, S_IRUGO | S_IWUSR | S_IWGRP +); + +static int msm_pm_sleep_time_override; +module_param_named(sleep_time_override, + msm_pm_sleep_time_override, int, S_IRUGO | S_IWUSR | S_IWGRP); +static uint64_t suspend_wake_time; + +static bool print_parsed_dt; +module_param_named( + print_parsed_dt, print_parsed_dt, bool, S_IRUGO | S_IWUSR | S_IWGRP +); + +static bool sleep_disabled; +module_param_named(sleep_disabled, + sleep_disabled, bool, S_IRUGO | S_IWUSR | S_IWGRP); + +s32 msm_cpuidle_get_deep_idle_latency(void) +{ + return 10; +} + +void lpm_suspend_wake_time(uint64_t wakeup_time) +{ + if (wakeup_time <= 0) { + suspend_wake_time = msm_pm_sleep_time_override * MSEC_PER_SEC; + return; + } + + if (msm_pm_sleep_time_override && + (msm_pm_sleep_time_override < wakeup_time)) + suspend_wake_time = msm_pm_sleep_time_override * MSEC_PER_SEC; + else + suspend_wake_time = wakeup_time; +} +EXPORT_SYMBOL(lpm_suspend_wake_time); + +static uint32_t least_cluster_latency(struct lpm_cluster *cluster, + struct latency_level *lat_level) +{ + struct list_head *list; + struct lpm_cluster_level *level; + struct lpm_cluster *n; + struct power_params *pwr_params; + uint32_t latency = 0; + int i; + + if (!cluster->list.next) { + for (i = 0; i < cluster->nlevels; i++) { + level = &cluster->levels[i]; + pwr_params = &level->pwr; + if (lat_level->reset_level == level->reset_level) { + if ((latency > pwr_params->latency_us) + || (!latency)) + latency = pwr_params->latency_us; + break; + } + } + } else { + list_for_each(list, &cluster->parent->child) { + n = list_entry(list, typeof(*n), list); + if (lat_level->level_name) { + if (strcmp(lat_level->level_name, + n->cluster_name)) + continue; + } + for (i = 0; i < n->nlevels; i++) { + level = &n->levels[i]; + pwr_params = &level->pwr; + if (lat_level->reset_level == + level->reset_level) { + if ((latency > pwr_params->latency_us) + || (!latency)) + latency = + pwr_params->latency_us; + break; + } + } + } + } + return latency; +} + +static uint32_t least_cpu_latency(struct list_head *child, + struct latency_level *lat_level) +{ + struct list_head *list; + struct lpm_cpu_level *level; + struct power_params *pwr_params; + struct lpm_cpu *cpu; + struct lpm_cluster *n; + uint32_t latency = 0; + int i; + + list_for_each(list, child) { + n = list_entry(list, typeof(*n), list); + if (lat_level->level_name) { + if (strcmp(lat_level->level_name, n->cluster_name)) + continue; + } + cpu = n->cpu; + for (i = 0; i < cpu->nlevels; i++) { + level = &cpu->levels[i]; + pwr_params = &level->pwr; + if (lat_level->reset_level == level->reset_level) { + if ((latency > pwr_params->latency_us) + || (!latency)) + latency = pwr_params->latency_us; + break; + } + } + } + return latency; +} + +static struct lpm_cluster *cluster_aff_match(struct lpm_cluster *cluster, + int affinity_level) +{ + struct lpm_cluster *n; + + if ((cluster->aff_level == affinity_level) + || ((cluster->cpu) && (affinity_level == 0))) + return cluster; + else if (!cluster->cpu) { + n = list_entry(cluster->child.next, typeof(*n), list); + return cluster_aff_match(n, affinity_level); + } else + return NULL; +} + +int lpm_get_latency(struct latency_level *level, uint32_t *latency) +{ + struct lpm_cluster *cluster; + uint32_t val; + + if (!lpm_root_node) { + pr_err("%s: lpm_probe not completed\n", __func__); + return -EAGAIN; + } + + if ((level->affinity_level < 0) + || (level->affinity_level > lpm_root_node->aff_level) + || (level->reset_level < LPM_RESET_LVL_RET) + || (level->reset_level > LPM_RESET_LVL_PC) + || !latency) + return -EINVAL; + + cluster = cluster_aff_match(lpm_root_node, level->affinity_level); + if (!cluster) { + pr_err("%s:No matching cluster found for affinity_level:%d\n", + __func__, level->affinity_level); + return -EINVAL; + } + + if (level->affinity_level == 0) + val = least_cpu_latency(&cluster->parent->child, level); + else + val = least_cluster_latency(cluster, level); + + if (!val) { + pr_err("%s:No mode with affinity_level:%d reset_level:%d\n", + __func__, level->affinity_level, level->reset_level); + return -EINVAL; + } + + *latency = val; + + return 0; +} +EXPORT_SYMBOL(lpm_get_latency); + +static void update_debug_pc_event(enum debug_event event, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4) +{ + struct lpm_debug *dbg; + int idx; + static DEFINE_SPINLOCK(debug_lock); + static int pc_event_index; + + if (!lpm_debug) + return; + + spin_lock(&debug_lock); + idx = pc_event_index++; + dbg = &lpm_debug[idx & (num_dbg_elements - 1)]; + + dbg->evt = event; + dbg->time = arch_counter_get_cntvct(); + dbg->cpu = raw_smp_processor_id(); + dbg->arg1 = arg1; + dbg->arg2 = arg2; + dbg->arg3 = arg3; + dbg->arg4 = arg4; + spin_unlock(&debug_lock); +} + +static int lpm_cpu_callback(struct notifier_block *cpu_nb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long) hcpu; + struct lpm_cluster *cluster = per_cpu(cpu_cluster, (unsigned int) cpu); + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DYING: + update_debug_pc_event(CPU_HP_DYING, cpu, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], false); + cluster_prepare(cluster, get_cpu_mask((unsigned int) cpu), + NR_LPM_LEVELS, false, 0); + break; + case CPU_STARTING: + update_debug_pc_event(CPU_HP_STARTING, cpu, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], false); + cluster_unprepare(cluster, get_cpu_mask((unsigned int) cpu), + NR_LPM_LEVELS, false, 0); + break; + default: + break; + } + return NOTIFY_OK; +} + +#ifdef CONFIG_ARM_PSCI + +static int __init set_cpuidle_ops(void) +{ + int ret = 0, cpu; + + for_each_possible_cpu(cpu) { + ret = arm_cpuidle_init(cpu); + if (ret) + goto exit; + } + +exit: + return ret; +} + +#endif + +static enum hrtimer_restart lpm_hrtimer_cb(struct hrtimer *h) +{ + return HRTIMER_NORESTART; +} + +static void histtimer_cancel(void) +{ + hrtimer_try_to_cancel(&histtimer); +} + +static enum hrtimer_restart histtimer_fn(struct hrtimer *h) +{ + int cpu = raw_smp_processor_id(); + struct lpm_history *history = &per_cpu(hist, cpu); + + history->hinvalid = 1; + return HRTIMER_NORESTART; +} + +static void histtimer_start(uint32_t time_us) +{ + uint64_t time_ns = time_us * NSEC_PER_USEC; + ktime_t hist_ktime = ns_to_ktime(time_ns); + + histtimer.function = histtimer_fn; + hrtimer_start(&histtimer, hist_ktime, HRTIMER_MODE_REL_PINNED); +} + +static void cluster_timer_init(struct lpm_cluster *cluster) +{ + struct list_head *list; + + if (!cluster) + return; + + hrtimer_init(&cluster->histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + list_for_each(list, &cluster->child) { + struct lpm_cluster *n; + + n = list_entry(list, typeof(*n), list); + cluster_timer_init(n); + } +} + +static void clusttimer_cancel(void) +{ + int cpu = raw_smp_processor_id(); + struct lpm_cluster *cluster = per_cpu(cpu_cluster, cpu); + + hrtimer_try_to_cancel(&cluster->histtimer); + hrtimer_try_to_cancel(&cluster->parent->histtimer); +} + +static enum hrtimer_restart clusttimer_fn(struct hrtimer *h) +{ + struct lpm_cluster *cluster = container_of(h, + struct lpm_cluster, histtimer); + + cluster->history.hinvalid = 1; + return HRTIMER_NORESTART; +} + +static void clusttimer_start(struct lpm_cluster *cluster, uint32_t time_us) +{ + uint64_t time_ns = time_us * NSEC_PER_USEC; + ktime_t clust_ktime = ns_to_ktime(time_ns); + + cluster->histtimer.function = clusttimer_fn; + hrtimer_start(&cluster->histtimer, clust_ktime, + HRTIMER_MODE_REL_PINNED); +} + +static void msm_pm_set_timer(uint32_t modified_time_us) +{ + u64 modified_time_ns = modified_time_us * NSEC_PER_USEC; + ktime_t modified_ktime = ns_to_ktime(modified_time_ns); + + lpm_hrtimer.function = lpm_hrtimer_cb; + hrtimer_start(&lpm_hrtimer, modified_ktime, HRTIMER_MODE_REL_PINNED); +} + +int set_l2_mode(struct low_power_ops *ops, int mode, bool notify_rpm) +{ + int lpm = mode; + int rc = 0; + struct low_power_ops *cpu_ops = per_cpu(cpu_cluster, + smp_processor_id())->lpm_dev; + + if (cpu_ops->tz_flag & MSM_SCM_L2_OFF || + cpu_ops->tz_flag & MSM_SCM_L2_GDHS) + coresight_cti_ctx_restore(); + + switch (mode) { + case MSM_SPM_MODE_STANDALONE_POWER_COLLAPSE: + case MSM_SPM_MODE_POWER_COLLAPSE: + case MSM_SPM_MODE_FASTPC: + cpu_ops->tz_flag = MSM_SCM_L2_OFF; + coresight_cti_ctx_save(); + break; + case MSM_SPM_MODE_GDHS: + cpu_ops->tz_flag = MSM_SCM_L2_GDHS; + coresight_cti_ctx_save(); + break; + case MSM_SPM_MODE_CLOCK_GATING: + case MSM_SPM_MODE_RETENTION: + case MSM_SPM_MODE_DISABLED: + cpu_ops->tz_flag = MSM_SCM_L2_ON; + break; + default: + cpu_ops->tz_flag = MSM_SCM_L2_ON; + lpm = MSM_SPM_MODE_DISABLED; + break; + } + rc = msm_spm_config_low_power_mode(ops->spm, lpm, notify_rpm); + + if (rc) + pr_err("%s: Failed to set L2 low power mode %d, ERR %d", + __func__, lpm, rc); + + return rc; +} + +int set_l3_mode(struct low_power_ops *ops, int mode, bool notify_rpm) +{ + struct low_power_ops *cpu_ops = per_cpu(cpu_cluster, + smp_processor_id())->lpm_dev; + + switch (mode) { + case MSM_SPM_MODE_STANDALONE_POWER_COLLAPSE: + case MSM_SPM_MODE_POWER_COLLAPSE: + case MSM_SPM_MODE_FASTPC: + cpu_ops->tz_flag |= MSM_SCM_L3_PC_OFF; + break; + default: + break; + } + return msm_spm_config_low_power_mode(ops->spm, mode, notify_rpm); +} + + +int set_system_mode(struct low_power_ops *ops, int mode, bool notify_rpm) +{ + return msm_spm_config_low_power_mode(ops->spm, mode, notify_rpm); +} + +static int set_device_mode(struct lpm_cluster *cluster, int ndevice, + struct lpm_cluster_level *level) +{ + struct low_power_ops *ops; + + if (use_psci) + return 0; + + ops = &cluster->lpm_dev[ndevice]; + if (ops && ops->set_mode) + return ops->set_mode(ops, level->mode[ndevice], + level->notify_rpm); + else + return -EINVAL; +} + +static uint64_t lpm_cpuidle_predict(struct cpuidle_device *dev, + struct lpm_cpu *cpu, int *idx_restrict, + uint32_t *idx_restrict_time) +{ + int i, j, divisor; + uint64_t max, avg, stddev; + int64_t thresh = LLONG_MAX; + struct lpm_history *history = &per_cpu(hist, dev->cpu); + uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu); + + if (!lpm_prediction) + return 0; + + /* + * Samples are marked invalid when woken-up due to timer, + * so donot predict. + */ + if (history->hinvalid) { + history->hinvalid = 0; + history->htmr_wkup = 1; + history->stime = 0; + return 0; + } + + /* + * Predict only when all the samples are collected. + */ + if (history->nsamp < MAXSAMPLES) { + history->stime = 0; + return 0; + } + + /* + * Check if the samples are not much deviated, if so use the + * average of those as predicted sleep time. Else if any + * specific mode has more premature exits return the index of + * that mode. + */ + +again: + max = avg = divisor = stddev = 0; + for (i = 0; i < MAXSAMPLES; i++) { + int64_t value = history->resi[i]; + + if (value <= thresh) { + avg += value; + divisor++; + if (value > max) + max = value; + } + } + do_div(avg, divisor); + + for (i = 0; i < MAXSAMPLES; i++) { + int64_t value = history->resi[i]; + + if (value <= thresh) { + int64_t diff = value - avg; + + stddev += diff * diff; + } + } + do_div(stddev, divisor); + stddev = int_sqrt(stddev); + + /* + * If the deviation is less, return the average, else + * ignore one maximum sample and retry + */ + if (((avg > stddev * 6) && (divisor >= (MAXSAMPLES - 1))) + || stddev <= ref_stddev) { + history->stime = ktime_to_us(ktime_get()) + avg; + return avg; + } else if (divisor > (MAXSAMPLES - 1)) { + thresh = max - 1; + goto again; + } + + /* + * Find the number of premature exits for each of the mode, + * excluding clockgating mode, and they are more than fifty + * percent restrict that and deeper modes. + */ + if (history->htmr_wkup != 1) { + for (j = 1; j < cpu->nlevels; j++) { + uint32_t failed = 0; + uint64_t total = 0; + + for (i = 0; i < MAXSAMPLES; i++) { + if ((history->mode[i] == j) && + (history->resi[i] < min_residency[j])) { + failed++; + total += history->resi[i]; + } + } + if (failed > (MAXSAMPLES/2)) { + *idx_restrict = j; + do_div(total, failed); + *idx_restrict_time = total; + history->stime = ktime_to_us(ktime_get()) + + *idx_restrict_time; + break; + } + } + } + return 0; +} + +static inline void invalidate_predict_history(struct cpuidle_device *dev) +{ + struct lpm_history *history = &per_cpu(hist, dev->cpu); + + if (!lpm_prediction) + return; + + if (history->hinvalid) { + history->hinvalid = 0; + history->htmr_wkup = 1; + history->stime = 0; + } +} + +static void clear_predict_history(void) +{ + struct lpm_history *history; + int i; + unsigned int cpu; + + if (!lpm_prediction) + return; + + for_each_possible_cpu(cpu) { + history = &per_cpu(hist, cpu); + for (i = 0; i < MAXSAMPLES; i++) { + history->resi[i] = 0; + history->mode[i] = -1; + history->hptr = 0; + history->nsamp = 0; + history->stime = 0; + } + } +} + +static void update_history(struct cpuidle_device *dev, int idx); + +static int cpu_power_select(struct cpuidle_device *dev, + struct lpm_cpu *cpu) +{ + int best_level = -1; + uint32_t latency_us = pm_qos_request_for_cpu(PM_QOS_CPU_DMA_LATENCY, + dev->cpu); + s64 sleep_us = ktime_to_us(tick_nohz_get_sleep_length()); + uint32_t modified_time_us = 0; + uint32_t next_event_us = 0; + int i, idx_restrict; + uint32_t lvl_latency_us = 0; + uint64_t predicted = 0; + uint32_t htime = 0, idx_restrict_time = 0; + uint32_t next_wakeup_us = (uint32_t)sleep_us; + uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu); + uint32_t *max_residency = get_per_cpu_max_residency(dev->cpu); + + if (!cpu) + return -EINVAL; + + if ((sleep_disabled && !cpu_isolated(dev->cpu)) || sleep_us < 0) + return 0; + + idx_restrict = cpu->nlevels + 1; + + next_event_us = (uint32_t)(ktime_to_us(get_next_event_time(dev->cpu))); + + for (i = 0; i < cpu->nlevels; i++) { + struct lpm_cpu_level *level = &cpu->levels[i]; + struct power_params *pwr_params = &level->pwr; + enum msm_pm_sleep_mode mode = level->mode; + bool allow; + + allow = lpm_cpu_mode_allow(dev->cpu, i, true); + + if (!allow) + continue; + + lvl_latency_us = pwr_params->latency_us; + + if (latency_us < lvl_latency_us) + break; + + if (next_event_us) { + if (next_event_us < lvl_latency_us) + break; + + if (((next_event_us - lvl_latency_us) < sleep_us) || + (next_event_us < sleep_us)) + next_wakeup_us = next_event_us - lvl_latency_us; + } + + if (!i) { + /* + * If the next_wake_us itself is not sufficient for + * deeper low power modes than clock gating do not + * call prediction. + */ + if (next_wakeup_us > max_residency[i]) { + predicted = lpm_cpuidle_predict(dev, cpu, + &idx_restrict, &idx_restrict_time); + if (predicted && (predicted < min_residency[i])) + predicted = min_residency[i]; + } else + invalidate_predict_history(dev); + } + + if (i >= idx_restrict) + break; + + best_level = i; + + if (next_event_us && next_event_us < sleep_us && + (mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT)) + modified_time_us + = next_event_us - lvl_latency_us; + else + modified_time_us = 0; + + if (predicted ? (predicted <= max_residency[i]) + : (next_wakeup_us <= max_residency[i])) + break; + } + + if (modified_time_us) + msm_pm_set_timer(modified_time_us); + + /* + * Start timer to avoid staying in shallower mode forever + * incase of misprediciton + */ + if ((predicted || (idx_restrict != (cpu->nlevels + 1))) + && ((best_level >= 0) + && (best_level < (cpu->nlevels-1)))) { + htime = predicted + tmr_add; + if (htime == tmr_add) + htime = idx_restrict_time; + else if (htime > max_residency[best_level]) + htime = max_residency[best_level]; + + if ((next_wakeup_us > htime) && + ((next_wakeup_us - htime) > max_residency[best_level])) + histtimer_start(htime); + } + + trace_cpu_power_select(best_level, sleep_us, latency_us, next_event_us); + + trace_cpu_pred_select(idx_restrict_time ? 2 : (predicted ? 1 : 0), + predicted, htime); + + return best_level; +} + +static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster, + struct cpumask *mask, bool from_idle, uint32_t *pred_time) +{ + int cpu; + int next_cpu = raw_smp_processor_id(); + ktime_t next_event; + struct cpumask online_cpus_in_cluster; + struct lpm_history *history; + int64_t prediction = LONG_MAX; + + next_event.tv64 = KTIME_MAX; + if (!suspend_wake_time) + suspend_wake_time = msm_pm_sleep_time_override; + if (!from_idle) { + if (mask) + cpumask_copy(mask, cpumask_of(raw_smp_processor_id())); + if (!suspend_wake_time) + return ~0ULL; + else + return USEC_PER_MSEC * suspend_wake_time; + } + + cpumask_and(&online_cpus_in_cluster, + &cluster->num_children_in_sync, cpu_online_mask); + + for_each_cpu(cpu, &online_cpus_in_cluster) { + ktime_t *next_event_c; + + next_event_c = get_next_event_cpu(cpu); + if (next_event_c->tv64 < next_event.tv64) { + next_event.tv64 = next_event_c->tv64; + next_cpu = cpu; + } + + if (from_idle && lpm_prediction) { + history = &per_cpu(hist, cpu); + if (history->stime && (history->stime < prediction)) + prediction = history->stime; + } + } + + if (mask) + cpumask_copy(mask, cpumask_of(next_cpu)); + + if (from_idle && lpm_prediction) { + if (prediction > ktime_to_us(ktime_get())) + *pred_time = prediction - ktime_to_us(ktime_get()); + } + + if (ktime_to_us(next_event) > ktime_to_us(ktime_get())) + return ktime_to_us(ktime_sub(next_event, ktime_get())); + else + return 0; +} + +static int cluster_predict(struct lpm_cluster *cluster, + uint32_t *pred_us) +{ + int i, j; + int ret = 0; + struct cluster_history *history = &cluster->history; + int64_t cur_time = ktime_to_us(ktime_get()); + + if (!lpm_prediction) + return 0; + + if (history->hinvalid) { + history->hinvalid = 0; + history->htmr_wkup = 1; + history->flag = 0; + return ret; + } + + if (history->nsamp == MAXSAMPLES) { + for (i = 0; i < MAXSAMPLES; i++) { + if ((cur_time - history->stime[i]) + > CLUST_SMPL_INVLD_TIME) + history->nsamp--; + } + } + + if (history->nsamp < MAXSAMPLES) { + history->flag = 0; + return ret; + } + + if (history->flag == 2) + history->flag = 0; + + if (history->htmr_wkup != 1) { + uint64_t total = 0; + + if (history->flag == 1) { + for (i = 0; i < MAXSAMPLES; i++) + total += history->resi[i]; + do_div(total, MAXSAMPLES); + *pred_us = total; + return 2; + } + + for (j = 1; j < cluster->nlevels; j++) { + uint32_t failed = 0; + + total = 0; + for (i = 0; i < MAXSAMPLES; i++) { + if ((history->mode[i] == j) && (history->resi[i] + < cluster->levels[j].pwr.min_residency)) { + failed++; + total += history->resi[i]; + } + } + + if (failed > (MAXSAMPLES-2)) { + do_div(total, failed); + *pred_us = total; + history->flag = 1; + return 1; + } + } + } + + return ret; +} + +static void update_cluster_history_time(struct cluster_history *history, + int idx, uint64_t start) +{ + history->entry_idx = idx; + history->entry_time = start; +} + +static void update_cluster_history(struct cluster_history *history, int idx) +{ + uint32_t tmr = 0; + uint32_t residency = 0; + struct lpm_cluster *cluster = + container_of(history, struct lpm_cluster, history); + + if (!lpm_prediction) + return; + + if ((history->entry_idx == -1) || (history->entry_idx == idx)) { + residency = ktime_to_us(ktime_get()) - history->entry_time; + history->stime[history->hptr] = history->entry_time; + } else + return; + + if (history->htmr_wkup) { + if (!history->hptr) + history->hptr = MAXSAMPLES-1; + else + history->hptr--; + + history->resi[history->hptr] += residency; + + history->htmr_wkup = 0; + tmr = 1; + } else { + history->resi[history->hptr] = residency; + } + + history->mode[history->hptr] = idx; + + history->entry_idx = INT_MIN; + history->entry_time = 0; + + if (history->nsamp < MAXSAMPLES) + history->nsamp++; + + trace_cluster_pred_hist(cluster->cluster_name, + history->mode[history->hptr], history->resi[history->hptr], + history->hptr, tmr); + + (history->hptr)++; + + if (history->hptr >= MAXSAMPLES) + history->hptr = 0; +} + +static void clear_cl_history_each(struct cluster_history *history) +{ + int i; + + for (i = 0; i < MAXSAMPLES; i++) { + history->resi[i] = 0; + history->mode[i] = -1; + history->stime[i] = 0; + } + history->hptr = 0; + history->nsamp = 0; + history->flag = 0; + history->hinvalid = 0; + history->htmr_wkup = 0; +} + +static void clear_cl_predict_history(void) +{ + struct lpm_cluster *cluster = lpm_root_node; + struct list_head *list; + + if (!lpm_prediction) + return; + + clear_cl_history_each(&cluster->history); + + list_for_each(list, &cluster->child) { + struct lpm_cluster *n; + + n = list_entry(list, typeof(*n), list); + clear_cl_history_each(&n->history); + } +} + +static int cluster_select(struct lpm_cluster *cluster, bool from_idle, + int *ispred) +{ + int best_level = -1; + int i; + struct cpumask mask; + uint32_t latency_us = ~0U; + uint32_t sleep_us; + uint32_t cpupred_us = 0, pred_us = 0; + int pred_mode = 0, predicted = 0; + + if (!cluster) + return -EINVAL; + + sleep_us = (uint32_t)get_cluster_sleep_time(cluster, NULL, + from_idle, &cpupred_us); + + if (from_idle) { + pred_mode = cluster_predict(cluster, &pred_us); + + if (cpupred_us && pred_mode && (cpupred_us < pred_us)) + pred_us = cpupred_us; + + if (pred_us && pred_mode && (pred_us < sleep_us)) + predicted = 1; + + if (predicted && (pred_us == cpupred_us)) + predicted = 2; + } + + if (cpumask_and(&mask, cpu_online_mask, &cluster->child_cpus)) + latency_us = pm_qos_request_for_cpumask(PM_QOS_CPU_DMA_LATENCY, + &mask); + + /* + * If atleast one of the core in the cluster is online, the cluster + * low power modes should be determined by the idle characteristics + * even if the last core enters the low power mode as a part of + * hotplug. + */ + + if (!from_idle && num_online_cpus() > 1 && + cpumask_intersects(&cluster->child_cpus, cpu_online_mask)) + from_idle = true; + + for (i = 0; i < cluster->nlevels; i++) { + struct lpm_cluster_level *level = &cluster->levels[i]; + struct power_params *pwr_params = &level->pwr; + + if (!lpm_cluster_mode_allow(cluster, i, from_idle)) + continue; + + if (level->last_core_only && + cpumask_weight(cpu_online_mask) > 1) + continue; + + if (!cpumask_equal(&cluster->num_children_in_sync, + &level->num_cpu_votes)) + continue; + + if (from_idle && latency_us < pwr_params->latency_us) + break; + + if (sleep_us < pwr_params->time_overhead_us) + break; + + if (suspend_in_progress && from_idle && level->notify_rpm) + continue; + + if (level->notify_rpm && msm_rpm_waiting_for_ack()) + continue; + + best_level = i; + + if (from_idle && + (predicted ? (pred_us <= pwr_params->max_residency) + : (sleep_us <= pwr_params->max_residency))) + break; + } + + if ((best_level == (cluster->nlevels - 1)) && (pred_mode == 2)) + cluster->history.flag = 2; + + *ispred = predicted; + + trace_cluster_pred_select(cluster->cluster_name, best_level, sleep_us, + latency_us, predicted, pred_us); + + return best_level; +} + +static void cluster_notify(struct lpm_cluster *cluster, + struct lpm_cluster_level *level, bool enter) +{ + if (level->is_reset && enter) + cpu_cluster_pm_enter(cluster->aff_level); + else if (level->is_reset && !enter) + cpu_cluster_pm_exit(cluster->aff_level); +} + +static int cluster_configure(struct lpm_cluster *cluster, int idx, + bool from_idle, int predicted) +{ + struct lpm_cluster_level *level = &cluster->levels[idx]; + struct cpumask online_cpus; + int ret, i; + + cpumask_and(&online_cpus, &cluster->num_children_in_sync, + cpu_online_mask); + + if (!cpumask_equal(&cluster->num_children_in_sync, &cluster->child_cpus) + || is_IPI_pending(&online_cpus)) { + return -EPERM; + } + + if (idx != cluster->default_level) { + update_debug_pc_event(CLUSTER_ENTER, idx, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], from_idle); + trace_cluster_enter(cluster->cluster_name, idx, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], from_idle); + lpm_stats_cluster_enter(cluster->stats, idx); + + if (from_idle && lpm_prediction) + update_cluster_history_time(&cluster->history, idx, + ktime_to_us(ktime_get())); + } + + for (i = 0; i < cluster->ndevices; i++) { + ret = set_device_mode(cluster, i, level); + if (ret) + goto failed_set_mode; + } + + if (level->notify_rpm) { + struct cpumask nextcpu, *cpumask; + uint64_t us; + uint32_t pred_us; + uint64_t sec; + uint64_t nsec; + + us = get_cluster_sleep_time(cluster, &nextcpu, + from_idle, &pred_us); + cpumask = level->disable_dynamic_routing ? NULL : &nextcpu; + + ret = msm_rpm_enter_sleep(0, cpumask); + if (ret) { + pr_info("Failed msm_rpm_enter_sleep() rc = %d\n", ret); + goto failed_set_mode; + } + + clear_predict_history(); + clear_cl_predict_history(); + + us = us + 1; + sec = us; + do_div(sec, USEC_PER_SEC); + nsec = us - sec * USEC_PER_SEC; + + sec = sec * SCLK_HZ; + if (nsec > 0) { + nsec = nsec * NSEC_PER_USEC; + do_div(nsec, NSEC_PER_SEC/SCLK_HZ); + } + us = sec + nsec; + msm_mpm_enter_sleep(us, from_idle, cpumask); + } + + /* Notify cluster enter event after successfully config completion */ + cluster_notify(cluster, level, true); + + sched_set_cluster_dstate(&cluster->child_cpus, idx, 0, 0); + + cluster->last_level = idx; + + if (predicted && (idx < (cluster->nlevels - 1))) { + struct power_params *pwr_params = &cluster->levels[idx].pwr; + + tick_broadcast_exit(); + clusttimer_start(cluster, pwr_params->max_residency + tmr_add); + tick_broadcast_enter(); + } + + return 0; + +failed_set_mode: + + for (i = 0; i < cluster->ndevices; i++) { + int rc = 0; + level = &cluster->levels[cluster->default_level]; + rc = set_device_mode(cluster, i, level); + BUG_ON(rc); + } + return ret; +} + +static void cluster_prepare(struct lpm_cluster *cluster, + const struct cpumask *cpu, int child_idx, bool from_idle, + int64_t start_time) +{ + int i; + int predicted = 0; + + if (!cluster) + return; + + if (cluster->min_child_level > child_idx) + return; + + spin_lock(&cluster->sync_lock); + cpumask_or(&cluster->num_children_in_sync, cpu, + &cluster->num_children_in_sync); + + for (i = 0; i < cluster->nlevels; i++) { + struct lpm_cluster_level *lvl = &cluster->levels[i]; + + if (child_idx >= lvl->min_child_level) + cpumask_or(&lvl->num_cpu_votes, cpu, + &lvl->num_cpu_votes); + } + + /* + * cluster_select() does not make any configuration changes. So its ok + * to release the lock here. If a core wakes up for a rude request, + * it need not wait for another to finish its cluster selection and + * configuration process + */ + + if (!cpumask_equal(&cluster->num_children_in_sync, + &cluster->child_cpus)) + goto failed; + + i = cluster_select(cluster, from_idle, &predicted); + + if (((i < 0) || (i == cluster->default_level)) + && predicted && from_idle) { + update_cluster_history_time(&cluster->history, + -1, ktime_to_us(ktime_get())); + + if (i < 0) { + struct power_params *pwr_params = + &cluster->levels[0].pwr; + + tick_broadcast_exit(); + clusttimer_start(cluster, + pwr_params->max_residency + tmr_add); + tick_broadcast_enter(); + } + } + + if (i < 0) + goto failed; + + if (cluster_configure(cluster, i, from_idle, predicted)) + goto failed; + + cluster->stats->sleep_time = start_time; + cluster_prepare(cluster->parent, &cluster->num_children_in_sync, i, + from_idle, start_time); + + spin_unlock(&cluster->sync_lock); + return; +failed: + spin_unlock(&cluster->sync_lock); + cluster->stats->sleep_time = 0; + return; +} + +static void cluster_unprepare(struct lpm_cluster *cluster, + const struct cpumask *cpu, int child_idx, bool from_idle, + int64_t end_time) +{ + struct lpm_cluster_level *level; + bool first_cpu; + int last_level, i, ret; + + if (!cluster) + return; + + if (cluster->min_child_level > child_idx) + return; + + spin_lock(&cluster->sync_lock); + last_level = cluster->default_level; + first_cpu = cpumask_equal(&cluster->num_children_in_sync, + &cluster->child_cpus); + cpumask_andnot(&cluster->num_children_in_sync, + &cluster->num_children_in_sync, cpu); + + for (i = 0; i < cluster->nlevels; i++) { + struct lpm_cluster_level *lvl = &cluster->levels[i]; + + if (child_idx >= lvl->min_child_level) + cpumask_andnot(&lvl->num_cpu_votes, + &lvl->num_cpu_votes, cpu); + } + + if (from_idle && first_cpu && + (cluster->last_level == cluster->default_level)) + update_cluster_history(&cluster->history, cluster->last_level); + + if (!first_cpu || cluster->last_level == cluster->default_level) + goto unlock_return; + + if (cluster->stats->sleep_time) + cluster->stats->sleep_time = end_time - + cluster->stats->sleep_time; + lpm_stats_cluster_exit(cluster->stats, cluster->last_level, true); + + level = &cluster->levels[cluster->last_level]; + if (level->notify_rpm) { + msm_rpm_exit_sleep(); + + /* If RPM bumps up CX to turbo, unvote CX turbo vote + * during exit of rpm assisted power collapse to + * reduce the power impact + */ + + lpm_wa_cx_unvote_send(); + msm_mpm_exit_sleep(from_idle); + } + + update_debug_pc_event(CLUSTER_EXIT, cluster->last_level, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], from_idle); + trace_cluster_exit(cluster->cluster_name, cluster->last_level, + cluster->num_children_in_sync.bits[0], + cluster->child_cpus.bits[0], from_idle); + + last_level = cluster->last_level; + cluster->last_level = cluster->default_level; + + for (i = 0; i < cluster->ndevices; i++) { + level = &cluster->levels[cluster->default_level]; + ret = set_device_mode(cluster, i, level); + + BUG_ON(ret); + + } + sched_set_cluster_dstate(&cluster->child_cpus, 0, 0, 0); + + cluster_notify(cluster, &cluster->levels[last_level], false); + + if (from_idle) + update_cluster_history(&cluster->history, last_level); + + cluster_unprepare(cluster->parent, &cluster->child_cpus, + last_level, from_idle, end_time); +unlock_return: + spin_unlock(&cluster->sync_lock); +} + +static inline void cpu_prepare(struct lpm_cluster *cluster, int cpu_index, + bool from_idle) +{ + struct lpm_cpu_level *cpu_level = &cluster->cpu->levels[cpu_index]; + bool jtag_save_restore = + cluster->cpu->levels[cpu_index].jtag_save_restore; + + /* Use broadcast timer for aggregating sleep mode within a cluster. + * A broadcast timer could be used in the following scenarios + * 1) The architected timer HW gets reset during certain low power + * modes and the core relies on a external(broadcast) timer to wake up + * from sleep. This information is passed through device tree. + * 2) The CPU low power mode could trigger a system low power mode. + * The low power module relies on Broadcast timer to aggregate the + * next wakeup within a cluster, in which case, CPU switches over to + * use broadcast timer. + */ + if (from_idle && (cpu_level->use_bc_timer || + (cpu_index >= cluster->min_child_level))) + tick_broadcast_enter(); + + if (from_idle && ((cpu_level->mode == MSM_PM_SLEEP_MODE_POWER_COLLAPSE) + || (cpu_level->mode == + MSM_PM_SLEEP_MODE_POWER_COLLAPSE_STANDALONE) + || (cpu_level->is_reset))) + cpu_pm_enter(); + + /* + * Save JTAG registers for 8996v1.0 & 8996v2.x in C4 LPM + */ + if (jtag_save_restore) + msm_jtag_save_state(); +} + +static inline void cpu_unprepare(struct lpm_cluster *cluster, int cpu_index, + bool from_idle) +{ + struct lpm_cpu_level *cpu_level = &cluster->cpu->levels[cpu_index]; + bool jtag_save_restore = + cluster->cpu->levels[cpu_index].jtag_save_restore; + + if (from_idle && (cpu_level->use_bc_timer || + (cpu_index >= cluster->min_child_level))) + tick_broadcast_exit(); + + if (from_idle && ((cpu_level->mode == MSM_PM_SLEEP_MODE_POWER_COLLAPSE) + || (cpu_level->mode == + MSM_PM_SLEEP_MODE_POWER_COLLAPSE_STANDALONE) + || cpu_level->is_reset)) + cpu_pm_exit(); + + /* + * Restore JTAG registers for 8996v1.0 & 8996v2.x in C4 LPM + */ + if (jtag_save_restore) + msm_jtag_restore_state(); +} + +int get_cluster_id(struct lpm_cluster *cluster, int *aff_lvl) +{ + int state_id = 0; + + if (!cluster) + return 0; + + spin_lock(&cluster->sync_lock); + + if (!cpumask_equal(&cluster->num_children_in_sync, + &cluster->child_cpus)) + goto unlock_and_return; + + state_id |= get_cluster_id(cluster->parent, aff_lvl); + + if (cluster->last_level != cluster->default_level) { + struct lpm_cluster_level *level + = &cluster->levels[cluster->last_level]; + + state_id |= (level->psci_id & cluster->psci_mode_mask) + << cluster->psci_mode_shift; + (*aff_lvl)++; + } +unlock_and_return: + spin_unlock(&cluster->sync_lock); + return state_id; +} + +#if !defined(CONFIG_CPU_V7) +bool psci_enter_sleep(struct lpm_cluster *cluster, int idx, bool from_idle) +{ + /* + * idx = 0 is the default LPM state + */ + if (!idx) { + stop_critical_timings(); + wfi(); + start_critical_timings(); + return 1; + } else { + int affinity_level = 0; + int state_id = get_cluster_id(cluster, &affinity_level); + int power_state = + PSCI_POWER_STATE(cluster->cpu->levels[idx].is_reset); + bool success = false; + + if (cluster->cpu->levels[idx].hyp_psci) { + stop_critical_timings(); + __invoke_psci_fn_smc(0xC4000021, 0, 0, 0); + start_critical_timings(); + return 1; + } + + affinity_level = PSCI_AFFINITY_LEVEL(affinity_level); + state_id |= (power_state | affinity_level + | cluster->cpu->levels[idx].psci_id); + + update_debug_pc_event(CPU_ENTER, state_id, + 0xdeaffeed, 0xdeaffeed, true); + stop_critical_timings(); + success = !arm_cpuidle_suspend(state_id); + start_critical_timings(); + update_debug_pc_event(CPU_EXIT, state_id, + success, 0xdeaffeed, true); + return success; + } +} +#elif defined(CONFIG_ARM_PSCI) +bool psci_enter_sleep(struct lpm_cluster *cluster, int idx, bool from_idle) +{ + if (!idx) { + stop_critical_timings(); + wfi(); + start_critical_timings(); + return 1; + } else { + int affinity_level = 0; + int state_id = get_cluster_id(cluster, &affinity_level); + int power_state = + PSCI_POWER_STATE(cluster->cpu->levels[idx].is_reset); + bool success = false; + + affinity_level = PSCI_AFFINITY_LEVEL(affinity_level); + state_id |= (power_state | affinity_level + | cluster->cpu->levels[idx].psci_id); + + update_debug_pc_event(CPU_ENTER, state_id, + 0xdeaffeed, 0xdeaffeed, true); + stop_critical_timings(); + success = !arm_cpuidle_suspend(state_id); + start_critical_timings(); + update_debug_pc_event(CPU_EXIT, state_id, + success, 0xdeaffeed, true); + return success; + } +} +#else +bool psci_enter_sleep(struct lpm_cluster *cluster, int idx, bool from_idle) +{ + WARN_ONCE(true, "PSCI cpu_suspend ops not supported\n"); + return false; +} +#endif + +static int lpm_cpuidle_select(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + struct lpm_cluster *cluster = per_cpu(cpu_cluster, dev->cpu); + int idx; + + if (!cluster) + return 0; + + idx = cpu_power_select(dev, cluster->cpu); + + if (idx < 0) + return -EPERM; + + return idx; +} + +static void update_history(struct cpuidle_device *dev, int idx) +{ + struct lpm_history *history = &per_cpu(hist, dev->cpu); + uint32_t tmr = 0; + + if (!lpm_prediction) + return; + + if (history->htmr_wkup) { + if (!history->hptr) + history->hptr = MAXSAMPLES-1; + else + history->hptr--; + + history->resi[history->hptr] += dev->last_residency; + history->htmr_wkup = 0; + tmr = 1; + } else + history->resi[history->hptr] = dev->last_residency; + + history->mode[history->hptr] = idx; + + trace_cpu_pred_hist(history->mode[history->hptr], + history->resi[history->hptr], history->hptr, tmr); + + if (history->nsamp < MAXSAMPLES) + history->nsamp++; + + (history->hptr)++; + if (history->hptr >= MAXSAMPLES) + history->hptr = 0; +} + +static int lpm_cpuidle_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + struct lpm_cluster *cluster = per_cpu(cpu_cluster, dev->cpu); + bool success = false; + const struct cpumask *cpumask = get_cpu_mask(dev->cpu); + int64_t start_time = ktime_to_ns(ktime_get()), end_time; + struct power_params *pwr_params; + + if (idx < 0) + return -EINVAL; + + pwr_params = &cluster->cpu->levels[idx].pwr; + sched_set_cpu_cstate(smp_processor_id(), idx + 1, + pwr_params->energy_overhead, pwr_params->latency_us); + + pwr_params = &cluster->cpu->levels[idx].pwr; + + cpu_prepare(cluster, idx, true); + cluster_prepare(cluster, cpumask, idx, true, ktime_to_ns(ktime_get())); + + trace_cpu_idle_enter(idx); + lpm_stats_cpu_enter(idx, start_time); + + if (need_resched()) + goto exit; + + BUG_ON(!use_psci); + success = psci_enter_sleep(cluster, idx, true); + +exit: + end_time = ktime_to_ns(ktime_get()); + lpm_stats_cpu_exit(idx, end_time, success); + + cluster_unprepare(cluster, cpumask, idx, true, end_time); + cpu_unprepare(cluster, idx, true); + sched_set_cpu_cstate(smp_processor_id(), 0, 0, 0); + end_time = ktime_to_ns(ktime_get()) - start_time; + do_div(end_time, 1000); + dev->last_residency = end_time; + update_history(dev, idx); + trace_cpu_idle_exit(idx, success); + local_irq_enable(); + if (lpm_prediction) { + histtimer_cancel(); + clusttimer_cancel(); + } + return idx; +} + +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS +static int cpuidle_register_cpu(struct cpuidle_driver *drv, + struct cpumask *mask) +{ + struct cpuidle_device *device; + int cpu, ret; + + + if (!mask || !drv) + return -EINVAL; + + drv->cpumask = mask; + ret = cpuidle_register_driver(drv); + if (ret) { + pr_err("Failed to register cpuidle driver %d\n", ret); + goto failed_driver_register; + } + + for_each_cpu(cpu, mask) { + device = &per_cpu(cpuidle_dev, cpu); + device->cpu = cpu; + + ret = cpuidle_register_device(device); + if (ret) { + pr_err("Failed to register cpuidle driver for cpu:%u\n", + cpu); + goto failed_driver_register; + } + } + return ret; +failed_driver_register: + for_each_cpu(cpu, mask) + cpuidle_unregister_driver(drv); + return ret; +} +#else +static int cpuidle_register_cpu(struct cpuidle_driver *drv, + struct cpumask *mask) +{ + return cpuidle_register(drv, NULL); +} +#endif + +static struct cpuidle_governor lpm_governor = { + .name = "qcom", + .rating = 30, + .select = lpm_cpuidle_select, + .owner = THIS_MODULE, +}; + +static int cluster_cpuidle_register(struct lpm_cluster *cl) +{ + int i = 0, ret = 0; + unsigned cpu; + struct lpm_cluster *p = NULL; + + if (!cl->cpu) { + struct lpm_cluster *n; + + list_for_each_entry(n, &cl->child, list) { + ret = cluster_cpuidle_register(n); + if (ret) + break; + } + return ret; + } + + cl->drv = kzalloc(sizeof(*cl->drv), GFP_KERNEL); + if (!cl->drv) + return -ENOMEM; + + cl->drv->name = "msm_idle"; + + for (i = 0; i < cl->cpu->nlevels; i++) { + struct cpuidle_state *st = &cl->drv->states[i]; + struct lpm_cpu_level *cpu_level = &cl->cpu->levels[i]; + snprintf(st->name, CPUIDLE_NAME_LEN, "C%u\n", i); + snprintf(st->desc, CPUIDLE_DESC_LEN, cpu_level->name); + st->flags = 0; + st->exit_latency = cpu_level->pwr.latency_us; + st->power_usage = cpu_level->pwr.ss_power; + st->target_residency = 0; + st->enter = lpm_cpuidle_enter; + } + + cl->drv->state_count = cl->cpu->nlevels; + cl->drv->safe_state_index = 0; + for_each_cpu(cpu, &cl->child_cpus) + per_cpu(cpu_cluster, cpu) = cl; + + for_each_possible_cpu(cpu) { + if (cpu_online(cpu)) + continue; + p = per_cpu(cpu_cluster, cpu); + while (p) { + int j; + spin_lock(&p->sync_lock); + cpumask_set_cpu(cpu, &p->num_children_in_sync); + for (j = 0; j < p->nlevels; j++) + cpumask_copy(&p->levels[j].num_cpu_votes, + &p->num_children_in_sync); + spin_unlock(&p->sync_lock); + p = p->parent; + } + } + ret = cpuidle_register_cpu(cl->drv, &cl->child_cpus); + + if (ret) { + kfree(cl->drv); + return -ENOMEM; + } + return 0; +} + +/** + * init_lpm - initializes the governor + */ +static int __init init_lpm(void) +{ + return cpuidle_register_governor(&lpm_governor); +} + +postcore_initcall(init_lpm); + +static void register_cpu_lpm_stats(struct lpm_cpu *cpu, + struct lpm_cluster *parent) +{ + const char **level_name; + int i; + + level_name = kzalloc(cpu->nlevels * sizeof(*level_name), GFP_KERNEL); + + if (!level_name) + return; + + for (i = 0; i < cpu->nlevels; i++) + level_name[i] = cpu->levels[i].name; + + lpm_stats_config_level("cpu", level_name, cpu->nlevels, + parent->stats, &parent->child_cpus); + + kfree(level_name); +} + +static void register_cluster_lpm_stats(struct lpm_cluster *cl, + struct lpm_cluster *parent) +{ + const char **level_name; + int i; + struct lpm_cluster *child; + + if (!cl) + return; + + level_name = kzalloc(cl->nlevels * sizeof(*level_name), GFP_KERNEL); + + if (!level_name) + return; + + for (i = 0; i < cl->nlevels; i++) + level_name[i] = cl->levels[i].level_name; + + cl->stats = lpm_stats_config_level(cl->cluster_name, level_name, + cl->nlevels, parent ? parent->stats : NULL, NULL); + + kfree(level_name); + + if (cl->cpu) { + register_cpu_lpm_stats(cl->cpu, cl); + return; + } + + list_for_each_entry(child, &cl->child, list) + register_cluster_lpm_stats(child, cl); +} + +static int lpm_suspend_prepare(void) +{ + suspend_in_progress = true; + msm_mpm_suspend_prepare(); + lpm_stats_suspend_enter(); + + return 0; +} + +static void lpm_suspend_wake(void) +{ + suspend_in_progress = false; + msm_mpm_suspend_wake(); + lpm_stats_suspend_exit(); +} + +static int lpm_suspend_enter(suspend_state_t state) +{ + int cpu = raw_smp_processor_id(); + struct lpm_cluster *cluster = per_cpu(cpu_cluster, cpu); + struct lpm_cpu *lpm_cpu = cluster->cpu; + const struct cpumask *cpumask = get_cpu_mask(cpu); + int idx; + + for (idx = lpm_cpu->nlevels - 1; idx >= 0; idx--) { + + if (lpm_cpu_mode_allow(cpu, idx, false)) + break; + } + if (idx < 0) { + pr_err("Failed suspend\n"); + return 0; + } + cpu_prepare(cluster, idx, false); + cluster_prepare(cluster, cpumask, idx, false, 0); + if (idx > 0) + update_debug_pc_event(CPU_ENTER, idx, 0xdeaffeed, + 0xdeaffeed, false); + + /* + * Print the clocks which are enabled during system suspend + * This debug information is useful to know which are the + * clocks that are enabled and preventing the system level + * LPMs(XO and Vmin). + */ + clock_debug_print_enabled(); + + BUG_ON(!use_psci); + psci_enter_sleep(cluster, idx, true); + + if (idx > 0) + update_debug_pc_event(CPU_EXIT, idx, true, 0xdeaffeed, + false); + + cluster_unprepare(cluster, cpumask, idx, false, 0); + cpu_unprepare(cluster, idx, false); + return 0; +} + +static const struct platform_suspend_ops lpm_suspend_ops = { + .enter = lpm_suspend_enter, + .valid = suspend_valid_only_mem, + .prepare_late = lpm_suspend_prepare, + .wake = lpm_suspend_wake, +}; + +static int lpm_probe(struct platform_device *pdev) +{ + int ret; + int size; + struct kobject *module_kobj = NULL; + struct md_region md_entry; + + get_online_cpus(); + lpm_root_node = lpm_of_parse_cluster(pdev); + + if (IS_ERR_OR_NULL(lpm_root_node)) { + pr_err("%s(): Failed to probe low power modes\n", __func__); + put_online_cpus(); + return PTR_ERR(lpm_root_node); + } + + if (print_parsed_dt) + cluster_dt_walkthrough(lpm_root_node); + + /* + * Register hotplug notifier before broadcast time to ensure there + * to prevent race where a broadcast timer might not be setup on for a + * core. BUG in existing code but no known issues possibly because of + * how late lpm_levels gets initialized. + */ + suspend_set_ops(&lpm_suspend_ops); + hrtimer_init(&lpm_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cluster_timer_init(lpm_root_node); + + ret = remote_spin_lock_init(&scm_handoff_lock, SCM_HANDOFF_LOCK_ID); + if (ret) { + pr_err("%s: Failed initializing scm_handoff_lock (%d)\n", + __func__, ret); + put_online_cpus(); + return ret; + } + + size = num_dbg_elements * sizeof(struct lpm_debug); + lpm_debug = dma_alloc_coherent(&pdev->dev, size, + &lpm_debug_phys, GFP_KERNEL); + register_cluster_lpm_stats(lpm_root_node, NULL); + + ret = cluster_cpuidle_register(lpm_root_node); + put_online_cpus(); + if (ret) { + pr_err("%s()Failed to register with cpuidle framework\n", + __func__); + goto failed; + } + register_hotcpu_notifier(&lpm_cpu_nblk); + module_kobj = kset_find_obj(module_kset, KBUILD_MODNAME); + if (!module_kobj) { + pr_err("%s: cannot find kobject for module %s\n", + __func__, KBUILD_MODNAME); + ret = -ENOENT; + goto failed; + } + + ret = create_cluster_lvl_nodes(lpm_root_node, module_kobj); + if (ret) { + pr_err("%s(): Failed to create cluster level nodes\n", + __func__); + goto failed; + } + + /* Add lpm_debug to Minidump*/ + strlcpy(md_entry.name, "KLPMDEBUG", sizeof(md_entry.name)); + md_entry.virt_addr = (uintptr_t)lpm_debug; + md_entry.phys_addr = lpm_debug_phys; + md_entry.size = size; + if (msm_minidump_add_region(&md_entry)) + pr_info("Failed to add lpm_debug in Minidump\n"); + + return 0; +failed: + free_cluster_node(lpm_root_node); + lpm_root_node = NULL; + return ret; +} + +static struct of_device_id lpm_mtch_tbl[] = { + {.compatible = "qcom,lpm-levels"}, + {}, +}; + +static struct platform_driver lpm_driver = { + .probe = lpm_probe, + .driver = { + .name = "lpm-levels", + .owner = THIS_MODULE, + .of_match_table = lpm_mtch_tbl, + }, +}; + +static int __init lpm_levels_module_init(void) +{ + int rc; + rc = platform_driver_register(&lpm_driver); + if (rc) { + pr_info("Error registering %s\n", lpm_driver.driver.name); + goto fail; + } + +#ifdef CONFIG_ARM_PSCI + rc = set_cpuidle_ops(); + if (rc) { + pr_err("%s(): Failed to set cpuidle ops\n", __func__); + goto fail; + } +#endif + +fail: + return rc; +} +late_initcall(lpm_levels_module_init); + +enum msm_pm_l2_scm_flag lpm_cpu_pre_pc_cb(unsigned int cpu) +{ + struct lpm_cluster *cluster = per_cpu(cpu_cluster, cpu); + enum msm_pm_l2_scm_flag retflag = MSM_SCM_L2_ON; + + /* + * No need to acquire the lock if probe isn't completed yet + * In the event of the hotplug happening before lpm probe, we want to + * flush the cache to make sure that L2 is flushed. In particular, this + * could cause incoherencies for a cluster architecture. This wouldn't + * affect the idle case as the idle driver wouldn't be registered + * before the probe function + */ + if (!cluster) + return MSM_SCM_L2_OFF; + + /* + * Assumes L2 only. What/How parameters gets passed into TZ will + * determine how this function reports this info back in msm-pm.c + */ + spin_lock(&cluster->sync_lock); + + if (!cluster->lpm_dev) { + retflag = MSM_SCM_L2_OFF; + goto unlock_and_return; + } + + if (!cpumask_equal(&cluster->num_children_in_sync, + &cluster->child_cpus)) + goto unlock_and_return; + + if (cluster->lpm_dev) + retflag = cluster->lpm_dev->tz_flag; + /* + * The scm_handoff_lock will be release by the secure monitor. + * It is used to serialize power-collapses from this point on, + * so that both Linux and the secure context have a consistent + * view regarding the number of running cpus (cpu_count). + * + * It must be acquired before releasing the cluster lock. + */ +unlock_and_return: + update_debug_pc_event(PRE_PC_CB, retflag, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef); + trace_pre_pc_cb(retflag); + remote_spin_lock_rlock_id(&scm_handoff_lock, + REMOTE_SPINLOCK_TID_START + cpu); + spin_unlock(&cluster->sync_lock); + return retflag; +} |