summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile25
-rw-r--r--arch/arm64/kernel/alternative.c9
-rw-r--r--arch/arm64/kernel/app_api.c135
-rw-r--r--arch/arm64/kernel/app_setting.c139
-rw-r--r--arch/arm64/kernel/arm64ksyms.c7
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c19
-rw-r--r--arch/arm64/kernel/asm-offsets.c108
-rw-r--r--arch/arm64/kernel/bpi.S87
-rw-r--r--arch/arm64/kernel/cpu_errata.c192
-rw-r--r--arch/arm64/kernel/cpufeature.c27
-rw-r--r--arch/arm64/kernel/cpuinfo.c23
-rw-r--r--arch/arm64/kernel/debug-monitors.c33
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S16
-rw-r--r--arch/arm64/kernel/entry.S54
-rw-r--r--arch/arm64/kernel/fpsimd.c49
-rw-r--r--arch/arm64/kernel/head.S164
-rw-r--r--arch/arm64/kernel/hibernate-asm.S176
-rw-r--r--arch/arm64/kernel/hibernate.c520
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/hyp-stub.S45
-rw-r--r--arch/arm64/kernel/image.h2
-rw-r--r--arch/arm64/kernel/insn.c301
-rw-r--r--arch/arm64/kernel/io.c19
-rw-r--r--arch/arm64/kernel/kgdb.c4
-rw-r--r--arch/arm64/kernel/kuser32.S48
-rw-r--r--arch/arm64/kernel/module.c7
-rw-r--r--arch/arm64/kernel/perf_debug.c73
-rw-r--r--arch/arm64/kernel/perf_event.c682
-rw-r--r--arch/arm64/kernel/perf_trace_counters.c180
-rw-r--r--arch/arm64/kernel/perf_trace_counters.h111
-rw-r--r--arch/arm64/kernel/perf_trace_user.c96
-rw-r--r--arch/arm64/kernel/perf_trace_user.h85
-rw-r--r--arch/arm64/kernel/probes/Makefile3
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c174
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h35
-rw-r--r--arch/arm64/kernel/probes/kprobes.c657
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S81
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c217
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.h28
-rw-r--r--arch/arm64/kernel/process.c24
-rw-r--r--arch/arm64/kernel/psci.c102
-rw-r--r--arch/arm64/kernel/ptrace.c108
-rw-r--r--arch/arm64/kernel/setup.c26
-rw-r--r--arch/arm64/kernel/signal.c4
-rw-r--r--arch/arm64/kernel/signal32.c66
-rw-r--r--arch/arm64/kernel/sigreturn32.S67
-rw-r--r--arch/arm64/kernel/sleep.S161
-rw-r--r--arch/arm64/kernel/smp.c144
-rw-r--r--arch/arm64/kernel/smp_spin_table.c3
-rw-r--r--arch/arm64/kernel/stacktrace.c24
-rw-r--r--arch/arm64/kernel/suspend.c82
-rw-r--r--arch/arm64/kernel/topology.c187
-rw-r--r--arch/arm64/kernel/traps.c74
-rw-r--r--arch/arm64/kernel/vdso.c253
-rw-r--r--arch/arm64/kernel/vdso/Makefile29
-rw-r--r--arch/arm64/kernel/vdso/compiler.h70
-rw-r--r--arch/arm64/kernel/vdso/datapage.h59
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S1
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c3
-rw-r--r--arch/arm64/kernel/vdso32/.gitignore2
-rw-r--r--arch/arm64/kernel/vdso32/Makefile178
-rw-r--r--arch/arm64/kernel/vdso32/compiler.h122
-rw-r--r--arch/arm64/kernel/vdso32/datapage.h1
-rw-r--r--arch/arm64/kernel/vdso32/sigreturn.S76
-rw-r--r--arch/arm64/kernel/vdso32/vdso.S32
-rw-r--r--arch/arm64/kernel/vdso32/vdso.lds.S95
-rw-r--r--arch/arm64/kernel/vdso32/vgettimeofday.c3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S79
69 files changed, 5275 insertions, 1441 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d10034327423..18938199a838 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -25,14 +25,18 @@ OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
-arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
- sys_compat.o entry32.o \
- ../../arm/kernel/opcodes.o
+arm64-obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
+ sys_compat.o entry32.o
+ifneq ($(CONFIG_VDSO32),y)
+arm64-obj-$(CONFIG_COMPAT) += sigreturn32.o
+endif
+arm64-obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
-arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
+arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_debug.o perf_trace_counters.o \
+ perf_trace_user.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
@@ -42,10 +46,19 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
-arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+arm64-obj-$(CONFIG_MSM_APP_API) += app_api.o
+arm64-obj-$(CONFIG_MSM_APP_SETTINGS) += app_setting.o
+arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
+arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
+
+ifeq ($(CONFIG_KVM),y)
+arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
+endif
-obj-y += $(arm64-obj-y) vdso/
+obj-y += $(arm64-obj-y) vdso/ probes/
+obj-$(CONFIG_VDSO32) += vdso32/
obj-m += $(arm64-obj-m)
head-y := head.o
extra-y += $(head-y) vmlinux.lds
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d2ee1b21a10d..737481c8e918 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -25,14 +25,13 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/sections.h>
#include <linux/stop_machine.h>
#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-
struct alt_region {
struct alt_instr *begin;
struct alt_instr *end;
@@ -108,7 +107,7 @@ static void __apply_alternatives(void *alt_region)
for (i = 0; i < nr_inst; i++) {
insn = get_alt_insn(alt, origptr + i, replptr + i);
- *(origptr + i) = cpu_to_le32(insn);
+ BUG_ON(aarch64_insn_patch_text_nosync(origptr + i, insn));
}
flush_icache_range((uintptr_t)origptr,
@@ -124,8 +123,8 @@ static int __apply_alternatives_multi_stop(void *unused)
{
static int patched = 0;
struct alt_region region = {
- .begin = __alt_instructions,
- .end = __alt_instructions_end,
+ .begin = (struct alt_instr *)__alt_instructions,
+ .end = (struct alt_instr *)__alt_instructions_end,
};
/* We always have a CPU 0 at this point (__init) */
diff --git a/arch/arm64/kernel/app_api.c b/arch/arm64/kernel/app_api.c
new file mode 100644
index 000000000000..e995bbf3c7b4
--- /dev/null
+++ b/arch/arm64/kernel/app_api.c
@@ -0,0 +1,135 @@
+/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/export.h>
+
+#include <asm/app_api.h>
+
+static spinlock_t spinlock;
+static spinlock_t spinlock_32bit_app;
+static DEFINE_PER_CPU(int, app_config_applied);
+static unsigned long app_config_set[NR_CPUS];
+static unsigned long app_config_clear[NR_CPUS];
+
+void set_app_setting_bit(uint32_t bit)
+{
+ unsigned long flags;
+ uint64_t reg;
+ int cpu;
+
+ spin_lock_irqsave(&spinlock, flags);
+ asm volatile("mrs %0, S3_1_C15_C15_0" : "=r" (reg));
+ reg = reg | BIT(bit);
+ isb();
+ asm volatile("msr S3_1_C15_C15_0, %0" : : "r" (reg));
+ isb();
+ if (bit == APP_SETTING_BIT) {
+ cpu = raw_smp_processor_id();
+ app_config_set[cpu]++;
+
+ this_cpu_write(app_config_applied, 1);
+ }
+ spin_unlock_irqrestore(&spinlock, flags);
+
+}
+EXPORT_SYMBOL(set_app_setting_bit);
+
+void clear_app_setting_bit(uint32_t bit)
+{
+ unsigned long flags;
+ uint64_t reg;
+ int cpu;
+
+ spin_lock_irqsave(&spinlock, flags);
+ asm volatile("mrs %0, S3_1_C15_C15_0" : "=r" (reg));
+ reg = reg & ~BIT(bit);
+ isb();
+ asm volatile("msr S3_1_C15_C15_0, %0" : : "r" (reg));
+ isb();
+ if (bit == APP_SETTING_BIT) {
+ cpu = raw_smp_processor_id();
+ app_config_clear[cpu]++;
+
+ this_cpu_write(app_config_applied, 0);
+ }
+ spin_unlock_irqrestore(&spinlock, flags);
+}
+EXPORT_SYMBOL(clear_app_setting_bit);
+
+void set_app_setting_bit_for_32bit_apps(void)
+{
+ unsigned long flags;
+ uint64_t reg;
+
+ spin_lock_irqsave(&spinlock_32bit_app, flags);
+ if (use_32bit_app_setting) {
+ asm volatile("mrs %0, S3_0_c15_c15_0 " : "=r" (reg));
+ reg = reg | BIT(24);
+ isb();
+ asm volatile("msr S3_0_c15_c15_0, %0" : : "r" (reg));
+ isb();
+ asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg));
+ reg = reg | BIT(18) | BIT(2) | BIT(0);
+ isb();
+ asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg));
+ isb();
+ } else if (use_32bit_app_setting_pro) {
+ asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg));
+ reg = reg | BIT(18);
+ isb();
+ asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg));
+ isb();
+ }
+ spin_unlock_irqrestore(&spinlock_32bit_app, flags);
+}
+EXPORT_SYMBOL(set_app_setting_bit_for_32bit_apps);
+
+void clear_app_setting_bit_for_32bit_apps(void)
+{
+ unsigned long flags;
+ uint64_t reg;
+
+ spin_lock_irqsave(&spinlock_32bit_app, flags);
+ if (use_32bit_app_setting) {
+ asm volatile("mrs %0, S3_0_c15_c15_0 " : "=r" (reg));
+ reg = reg & ~BIT(24);
+ isb();
+ asm volatile("msr S3_0_c15_c15_0, %0" : : "r" (reg));
+ isb();
+ asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg));
+ reg = reg & ~BIT(18);
+ reg = reg & ~BIT(2);
+ reg = reg & ~BIT(0);
+ isb();
+ asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg));
+ isb();
+ } else if (use_32bit_app_setting_pro) {
+ asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg));
+ reg = reg & ~BIT(18);
+ isb();
+ asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg));
+ isb();
+ }
+ spin_unlock_irqrestore(&spinlock_32bit_app, flags);
+}
+EXPORT_SYMBOL(clear_app_setting_bit_for_32bit_apps);
+
+static int __init init_app_api(void)
+{
+ spin_lock_init(&spinlock);
+ spin_lock_init(&spinlock_32bit_app);
+ return 0;
+}
+early_initcall(init_app_api);
diff --git a/arch/arm64/kernel/app_setting.c b/arch/arm64/kernel/app_setting.c
new file mode 100644
index 000000000000..0c6b00317645
--- /dev/null
+++ b/arch/arm64/kernel/app_setting.c
@@ -0,0 +1,139 @@
+/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+
+#include <asm/app_api.h>
+
+#define MAX_LEN 100
+
+static char *lib_names[MAX_ENTRIES];
+static unsigned int count;
+static struct mutex mutex;
+
+static char lib_str[MAX_LEN] = "";
+static struct kparam_string kps = {
+ .string = lib_str,
+ .maxlen = MAX_LEN,
+};
+static int set_name(const char *str, struct kernel_param *kp);
+module_param_call(lib_name, set_name, param_get_string, &kps, S_IWUSR);
+
+bool use_app_setting = true;
+module_param(use_app_setting, bool, 0644);
+MODULE_PARM_DESC(use_app_setting, "control use of app specific settings");
+
+bool use_32bit_app_setting = true;
+module_param(use_32bit_app_setting, bool, 0644);
+MODULE_PARM_DESC(use_32bit_app_setting, "control use of 32 bit app specific settings");
+
+bool use_32bit_app_setting_pro;
+module_param(use_32bit_app_setting_pro, bool, 0644);
+MODULE_PARM_DESC(use_32bit_app_setting_pro, "control use of 32 bit app specific settings");
+
+static int set_name(const char *str, struct kernel_param *kp)
+{
+ int len = strlen(str);
+ char *name;
+
+ if (len >= MAX_LEN) {
+ pr_err("app_setting: name string too long\n");
+ return -ENOSPC;
+ }
+
+ /*
+ * echo adds '\n' which we need to chop off later
+ */
+ name = kzalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ strlcpy(name, str, len + 1);
+
+ if (name[len - 1] == '\n')
+ name[len - 1] = '\0';
+
+ mutex_lock(&mutex);
+ if (count < MAX_ENTRIES) {
+ lib_names[count] = name;
+ /*
+ * mb to ensure that the new lib_names entry is present
+ * before updating the view presented by get_lib_names
+ */
+ mb();
+ count++;
+ } else {
+ pr_err("app_setting: set name failed. Max entries reached\n");
+ kfree(name);
+ mutex_unlock(&mutex);
+ return -EPERM;
+ }
+ mutex_unlock(&mutex);
+
+ return 0;
+}
+
+void switch_app_setting_bit(struct task_struct *prev, struct task_struct *next)
+{
+ if (prev->mm && unlikely(prev->mm->app_setting))
+ clear_app_setting_bit(APP_SETTING_BIT);
+
+ if (next->mm && unlikely(next->mm->app_setting))
+ set_app_setting_bit(APP_SETTING_BIT);
+}
+EXPORT_SYMBOL(switch_app_setting_bit);
+
+void switch_32bit_app_setting_bit(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (prev->mm && unlikely(is_compat_thread(task_thread_info(prev))))
+ clear_app_setting_bit_for_32bit_apps();
+
+ if (next->mm && unlikely(is_compat_thread(task_thread_info(next))))
+ set_app_setting_bit_for_32bit_apps();
+}
+EXPORT_SYMBOL(switch_32bit_app_setting_bit);
+
+void apply_app_setting_bit(struct file *file)
+{
+ bool found = false;
+ int i;
+
+ if (file && file->f_path.dentry) {
+ const char *name = file->f_path.dentry->d_name.name;
+
+ for (i = 0; i < count; i++) {
+ if (unlikely(!strcmp(name, lib_names[i]))) {
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ preempt_disable();
+ set_app_setting_bit(APP_SETTING_BIT);
+ /* This will take care of child processes as well */
+ current->mm->app_setting = 1;
+ preempt_enable();
+ }
+ }
+}
+EXPORT_SYMBOL(apply_app_setting_bit);
+
+static int __init app_setting_init(void)
+{
+ mutex_init(&mutex);
+ return 0;
+}
+module_init(app_setting_init);
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 2dc44406a7ad..546ce8979b3a 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -26,8 +26,10 @@
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/kprobes.h>
#include <linux/arm-smccc.h>
+#include <asm/cacheflush.h>
#include <asm/checksum.h>
EXPORT_SYMBOL(copy_page);
@@ -68,7 +70,12 @@ EXPORT_SYMBOL(test_and_change_bit);
#ifdef CONFIG_FUNCTION_TRACER
EXPORT_SYMBOL(_mcount);
+NOKPROBE_SYMBOL(_mcount);
#endif
+ /* caching functions */
+EXPORT_SYMBOL(__dma_inv_range);
+EXPORT_SYMBOL(__dma_clean_range);
+EXPORT_SYMBOL(__dma_flush_range);
/* arm-smccc */
EXPORT_SYMBOL(arm_smccc_smc);
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index fae82901ae81..e012ecd018ee 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
+{
+ u32 cc_bits = opcode >> 28;
+
+ if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+ if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+ return ARM_OPCODE_CONDTEST_PASS;
+ else
+ return ARM_OPCODE_CONDTEST_FAIL;
+ }
+ return ARM_OPCODE_CONDTEST_UNCOND;
+}
+
/*
* swp_handler logs the id of calling process, dissects the instruction, sanity
* checks the memory location, calls emulate_swpX for the actual operation and
@@ -380,7 +395,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr)
type = instr & TYPE_SWPB;
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
@@ -461,7 +476,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
{
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3fa949f04ce4..737eb8b80485 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,9 +22,11 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/suspend.h>
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
+#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/vdso_datapage.h>
@@ -59,6 +61,17 @@ int main(void)
DEFINE(S_X5, offsetof(struct pt_regs, regs[5]));
DEFINE(S_X6, offsetof(struct pt_regs, regs[6]));
DEFINE(S_X7, offsetof(struct pt_regs, regs[7]));
+ DEFINE(S_X8, offsetof(struct pt_regs, regs[8]));
+ DEFINE(S_X10, offsetof(struct pt_regs, regs[10]));
+ DEFINE(S_X12, offsetof(struct pt_regs, regs[12]));
+ DEFINE(S_X14, offsetof(struct pt_regs, regs[14]));
+ DEFINE(S_X16, offsetof(struct pt_regs, regs[16]));
+ DEFINE(S_X18, offsetof(struct pt_regs, regs[18]));
+ DEFINE(S_X20, offsetof(struct pt_regs, regs[20]));
+ DEFINE(S_X22, offsetof(struct pt_regs, regs[22]));
+ DEFINE(S_X24, offsetof(struct pt_regs, regs[24]));
+ DEFINE(S_X26, offsetof(struct pt_regs, regs[26]));
+ DEFINE(S_X28, offsetof(struct pt_regs, regs[28]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
#ifdef CONFIG_COMPAT
@@ -71,6 +84,18 @@ int main(void)
DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
+#ifdef CONFIG_COMPAT
+ DEFINE(COMPAT_SIGFRAME_REGS_OFFSET,
+ offsetof(struct compat_sigframe, uc) +
+ offsetof(struct compat_ucontext, uc_mcontext) +
+ offsetof(struct compat_sigcontext, arm_r0));
+ DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET,
+ offsetof(struct compat_rt_sigframe, sig) +
+ offsetof(struct compat_sigframe, uc) +
+ offsetof(struct compat_ucontext, uc_mcontext) +
+ offsetof(struct compat_sigcontext, arm_r0));
+ BLANK();
+#endif
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
@@ -84,40 +109,6 @@ int main(void)
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
#ifdef CONFIG_THREAD_INFO_IN_TASK
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
@@ -128,62 +119,27 @@ int main(void)
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
- DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1));
- DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1));
- DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr));
- DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs));
+ DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
- DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
- DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
- DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
- DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
- DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
- DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr));
- DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
- DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2));
- DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
- DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
- DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
- DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
- DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
- DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
- DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
- DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
- DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
- DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
- DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
- DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
- DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
- DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
- DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
- DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
- DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
- DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
- DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
- DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
- DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
- DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
- DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
- DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
- DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask));
DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff));
- DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
- DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
- DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
+ DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
+ DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
BLANK();
+ DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
+ DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
+ DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
+ BLANK();
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
#endif
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
new file mode 100644
index 000000000000..76225c2611ea
--- /dev/null
+++ b/arch/arm64/kernel/bpi.S
@@ -0,0 +1,87 @@
+/*
+ * Contains CPU specific branch predictor invalidation sequences
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+.macro ventry target
+ .rept 31
+ nop
+ .endr
+ b \target
+.endm
+
+.macro vectors target
+ ventry \target + 0x000
+ ventry \target + 0x080
+ ventry \target + 0x100
+ ventry \target + 0x180
+
+ ventry \target + 0x200
+ ventry \target + 0x280
+ ventry \target + 0x300
+ ventry \target + 0x380
+
+ ventry \target + 0x400
+ ventry \target + 0x480
+ ventry \target + 0x500
+ ventry \target + 0x580
+
+ ventry \target + 0x600
+ ventry \target + 0x680
+ ventry \target + 0x700
+ ventry \target + 0x780
+.endm
+
+ .align 11
+ENTRY(__bp_harden_hyp_vecs_start)
+ .rept 4
+ vectors __kvm_hyp_vector
+ .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+ENTRY(__psci_hyp_bp_inval_start)
+ sub sp, sp, #(8 * 18)
+ stp x16, x17, [sp, #(16 * 0)]
+ stp x14, x15, [sp, #(16 * 1)]
+ stp x12, x13, [sp, #(16 * 2)]
+ stp x10, x11, [sp, #(16 * 3)]
+ stp x8, x9, [sp, #(16 * 4)]
+ stp x6, x7, [sp, #(16 * 5)]
+ stp x4, x5, [sp, #(16 * 6)]
+ stp x2, x3, [sp, #(16 * 7)]
+ stp x0, x1, [sp, #(16 * 8)]
+ mov x0, #0x84000000
+ smc #0
+ ldp x16, x17, [sp, #(16 * 0)]
+ ldp x14, x15, [sp, #(16 * 1)]
+ ldp x12, x13, [sp, #(16 * 2)]
+ ldp x10, x11, [sp, #(16 * 3)]
+ ldp x8, x9, [sp, #(16 * 4)]
+ ldp x6, x7, [sp, #(16 * 5)]
+ ldp x4, x5, [sp, #(16 * 6)]
+ ldp x2, x3, [sp, #(16 * 7)]
+ ldp x0, x1, [sp, #(16 * 8)]
+ add sp, sp, #(8 * 18)
+ENTRY(__psci_hyp_bp_inval_end)
+
+ENTRY(__qcom_hyp_sanitize_link_stack_start)
+ stp x29, x30, [sp, #-16]!
+ .rept 16
+ bl . + 4
+ .endr
+ ldp x29, x30, [sp], #16
+ENTRY(__qcom_hyp_sanitize_link_stack_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 06afd04e02c0..e857248dd980 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -29,12 +29,160 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
entry->midr_range_max);
}
+static bool __maybe_unused
+is_kryo_midr(const struct arm64_cpu_capabilities *entry)
+{
+ u32 model;
+
+ model = read_cpuid_id();
+ model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) |
+ MIDR_ARCHITECTURE_MASK;
+
+ return model == entry->midr_model;
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+#ifdef CONFIG_KVM
+extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
+extern char __qcom_hyp_sanitize_link_stack_start[];
+extern char __qcom_hyp_sanitize_link_stack_end[];
+
+static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
+ int i;
+
+ for (i = 0; i < SZ_2K; i += 0x80)
+ memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
+
+ flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
+}
+
+static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ static int last_slot = -1;
+ static DEFINE_SPINLOCK(bp_lock);
+ int cpu, slot = -1;
+
+ spin_lock(&bp_lock);
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
+ slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+ break;
+ }
+ }
+
+ if (slot == -1) {
+ last_slot++;
+ BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
+ / SZ_2K) <= last_slot);
+ slot = last_slot;
+ __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+ }
+
+ __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+ __this_cpu_write(bp_hardening_data.fn, fn);
+ spin_unlock(&bp_lock);
+}
+#else
+#define __psci_hyp_bp_inval_start NULL
+#define __psci_hyp_bp_inval_end NULL
+#define __qcom_hyp_sanitize_link_stack_start NULL
+#define __qcom_hyp_sanitize_link_stack_end NULL
+
+static void __maybe_unused __install_bp_hardening_cb(bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ __this_cpu_write(bp_hardening_data.fn, fn);
+}
+#endif /* CONFIG_KVM */
+
+static void __maybe_unused install_bp_hardening_cb(
+ const struct arm64_cpu_capabilities *entry,
+ bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ u64 pfr0;
+
+ if (!entry->matches(entry))
+ return;
+
+ pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
+ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+ return;
+
+ __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
+}
+
+#include <linux/psci.h>
+
+static int enable_psci_bp_hardening(void *data)
+{
+ const struct arm64_cpu_capabilities *entry = data;
+
+ if (psci_ops.get_version)
+ install_bp_hardening_cb(entry,
+ (bp_hardening_cb_t)psci_ops.get_version,
+ __psci_hyp_bp_inval_start,
+ __psci_hyp_bp_inval_end);
+
+ return 0;
+}
+
+static void __maybe_unused qcom_link_stack_sanitization(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
+static void __maybe_unused qcom_bp_hardening(void)
+{
+ qcom_link_stack_sanitization();
+ if (psci_ops.get_version)
+ psci_ops.get_version();
+}
+
+static int __maybe_unused enable_qcom_bp_hardening(void *data)
+{
+ const struct arm64_cpu_capabilities *entry = data;
+
+ install_bp_hardening_cb(entry,
+ (bp_hardening_cb_t)qcom_bp_hardening,
+ __psci_hyp_bp_inval_start,
+ __psci_hyp_bp_inval_end);
+ return 0;
+}
+
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
#define MIDR_RANGE(model, min, max) \
.matches = is_affected_midr_range, \
.midr_model = model, \
.midr_range_min = min, \
.midr_range_max = max
+#define MIDR_ALL_VERSIONS(model) \
+ .matches = is_affected_midr_range, \
+ .midr_model = model, \
+ .midr_range_min = 0, \
+ .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -79,6 +227,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_WORKAROUND_845719,
MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
},
+ {
+ /* Kryo2xx Silver rAp4 */
+ .desc = "Kryo2xx Silver erratum 845719",
+ .capability = ARM64_WORKAROUND_845719,
+ MIDR_RANGE(MIDR_KRYO2XX_SILVER, 0xA00004, 0xA00004),
+ },
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_23154
{
@@ -97,6 +251,39 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
(1 << MIDR_VARIANT_SHIFT) | 1),
},
#endif
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_KRYO2XX_GOLD),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ .midr_model = MIDR_QCOM_KRYO,
+ .matches = is_kryo_midr,
+ .enable = enable_qcom_bp_hardening,
+ },
+#endif
{
}
};
@@ -105,3 +292,8 @@ void check_local_cpu_errata(void)
{
update_cpu_capabilities(arm64_errata, "enabling workaround for");
}
+
+void __init enable_errata_workarounds(void)
+{
+ enable_cpu_capabilities(arm64_errata);
+}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aee34290cd09..2a2bf5231f6a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -29,6 +29,7 @@
#include <asm/cpu_ops.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
+#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -91,6 +92,7 @@ static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
+ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -656,6 +658,11 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
}
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
+{
+ return is_kernel_in_hyp_mode();
+}
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
@@ -744,6 +751,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = cpufeature_pan_not_uao,
},
#endif /* CONFIG_ARM64_PAN */
+ {
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .matches = runs_at_el2,
+ },
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
@@ -867,8 +879,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
* Run through the enabled capabilities and enable() it on all active
* CPUs
*/
-static void __init
-enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
int i;
@@ -880,7 +891,8 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
* uses an IPI, giving us a PSTATE that disappears when
* we return.
*/
- stop_machine(caps[i].enable, NULL, cpu_online_mask);
+ stop_machine(caps[i].enable, (void *)&caps[i],
+ cpu_online_mask);
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -995,7 +1007,7 @@ void verify_local_cpu_capabilities(void)
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
fail_incapable_cpu("arm64_features", &caps[i]);
if (caps[i].enable)
- caps[i].enable(NULL);
+ caps[i].enable((void *)&caps[i]);
}
for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
@@ -1027,6 +1039,7 @@ void __init setup_cpu_features(void)
/* Set the CPU feature capabilies */
setup_feature_capabilities();
+ enable_errata_workarounds();
setup_cpu_hwcaps();
/* Advertise that we have computed the system capabilities */
@@ -1040,9 +1053,9 @@ void __init setup_cpu_features(void)
if (!cwg)
pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
cls);
- if (L1_CACHE_BYTES < cls)
- pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
- L1_CACHE_BYTES, cls);
+ if (ARCH_DMA_MINALIGN < cls)
+ pr_warn("ARCH_DMA_MINALIGN smaller than the Cache Writeback Granule (%d < %d)\n",
+ ARCH_DMA_MINALIGN, cls);
}
static bool __maybe_unused
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 95a6fae54740..4b2caefd3a8f 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -19,6 +19,7 @@
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/elf.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -33,6 +34,10 @@
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/delay.h>
+#include <linux/of_fdt.h>
+
+char* (*arch_read_hardware_id)(void);
+EXPORT_SYMBOL(arch_read_hardware_id);
/*
* In case the boot CPU is hotpluggable, we record its initial state and
@@ -106,7 +111,9 @@ static int c_show(struct seq_file *m, void *v)
int i, j;
bool compat = personality(current->personality) == PER_LINUX32;
- for_each_online_cpu(i) {
+ seq_printf(m, "Processor\t: AArch64 Processor rev %d (%s)\n",
+ read_cpuid_id() & 15, ELF_PLATFORM);
+ for_each_present_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
u32 midr = cpuinfo->reg_midr;
@@ -156,6 +163,11 @@ static int c_show(struct seq_file *m, void *v)
seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
}
+ if (!arch_read_hardware_id)
+ seq_printf(m, "Hardware\t: %s\n", machine_name);
+ else
+ seq_printf(m, "Hardware\t: %s\n", arch_read_hardware_id());
+
return 0;
}
@@ -201,7 +213,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
- pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
+ pr_debug("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -215,7 +227,12 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1);
info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1);
info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1);
- info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1);
+ /*
+ * Explicitly mask out 16KB granule since we donot
+ * want to support it
+ */
+ info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1) &
+ (~MMFR0_EL1_16KGRAN_MASK);
info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1);
info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 77fbcabcd9e3..50392fe170a9 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -23,6 +23,7 @@
#include <linux/hardirq.h>
#include <linux/init.h>
#include <linux/ptrace.h>
+#include <linux/kprobes.h>
#include <linux/stat.h>
#include <linux/uaccess.h>
@@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr)
asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
local_dbg_restore(flags);
}
+NOKPROBE_SYMBOL(mdscr_write);
static u32 mdscr_read(void)
{
@@ -55,6 +57,7 @@ static u32 mdscr_read(void)
asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
return mdscr;
}
+NOKPROBE_SYMBOL(mdscr_read);
/*
* Allow root to disable self-hosted debug from userspace.
@@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el)
mdscr_write(mdscr);
}
}
+NOKPROBE_SYMBOL(enable_debug_monitors);
void disable_debug_monitors(enum dbg_active_el el)
{
@@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el)
mdscr_write(mdscr);
}
}
+NOKPROBE_SYMBOL(disable_debug_monitors);
/*
* OS lock clearing.
@@ -173,6 +178,7 @@ static void set_regs_spsr_ss(struct pt_regs *regs)
spsr |= DBG_SPSR_SS;
regs->pstate = spsr;
}
+NOKPROBE_SYMBOL(set_regs_spsr_ss);
static void clear_regs_spsr_ss(struct pt_regs *regs)
{
@@ -182,6 +188,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
spsr &= ~DBG_SPSR_SS;
regs->pstate = spsr;
}
+NOKPROBE_SYMBOL(clear_regs_spsr_ss);
/* EL1 Single Step Handler hooks */
static LIST_HEAD(step_hook);
@@ -225,6 +232,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr)
return retval;
}
+NOKPROBE_SYMBOL(call_step_hook);
static int single_step_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
@@ -253,6 +261,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
*/
user_rewind_single_step(current);
} else {
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ return 0;
+#endif
if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
return 0;
@@ -266,6 +278,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
return 0;
}
+NOKPROBE_SYMBOL(single_step_handler);
/*
* Breakpoint handler is re-entrant as another breakpoint can
@@ -303,6 +316,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr)
return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
}
+NOKPROBE_SYMBOL(call_break_hook);
static int brk_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
@@ -318,13 +332,21 @@ static int brk_handler(unsigned long addr, unsigned int esr,
};
force_sig_info(SIGTRAP, &info, current);
- } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
- pr_warning("Unexpected kernel BRK exception at EL1\n");
+ }
+#ifdef CONFIG_KPROBES
+ else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
+ if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED)
+ return -EFAULT;
+ }
+#endif
+ else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+ pr_warn("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
}
return 0;
}
+NOKPROBE_SYMBOL(brk_handler);
int aarch32_break_handler(struct pt_regs *regs)
{
@@ -369,6 +391,7 @@ int aarch32_break_handler(struct pt_regs *regs)
force_sig_info(SIGTRAP, &info, current);
return 0;
}
+NOKPROBE_SYMBOL(aarch32_break_handler);
static int __init debug_traps_init(void)
{
@@ -390,6 +413,7 @@ void user_rewind_single_step(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_SINGLESTEP))
set_regs_spsr_ss(task_pt_regs(task));
}
+NOKPROBE_SYMBOL(user_rewind_single_step);
void user_fastforward_single_step(struct task_struct *task)
{
@@ -405,6 +429,7 @@ void kernel_enable_single_step(struct pt_regs *regs)
mdscr_write(mdscr_read() | DBG_MDSCR_SS);
enable_debug_monitors(DBG_ACTIVE_EL1);
}
+NOKPROBE_SYMBOL(kernel_enable_single_step);
void kernel_disable_single_step(void)
{
@@ -412,12 +437,14 @@ void kernel_disable_single_step(void)
mdscr_write(mdscr_read() & ~DBG_MDSCR_SS);
disable_debug_monitors(DBG_ACTIVE_EL1);
}
+NOKPROBE_SYMBOL(kernel_disable_single_step);
int kernel_active_single_step(void)
{
WARN_ON(!irqs_disabled());
return mdscr_read() & DBG_MDSCR_SS;
}
+NOKPROBE_SYMBOL(kernel_active_single_step);
/* ptrace API */
void user_enable_single_step(struct task_struct *task)
@@ -427,8 +454,10 @@ void user_enable_single_step(struct task_struct *task)
if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
set_regs_spsr_ss(task_pt_regs(task));
}
+NOKPROBE_SYMBOL(user_enable_single_step);
void user_disable_single_step(struct task_struct *task)
{
clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
}
+NOKPROBE_SYMBOL(user_disable_single_step);
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index f82036e02485..936022f0655e 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,7 @@ ENTRY(entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- movz x21, #:abs_g0:stext_offset
+ ldr w21, =stext_offset
add x21, x0, x21
/*
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f146b1..1ffe15459c92 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -64,4 +64,20 @@ ENTRY(fpsimd_load_partial_state)
ret
ENDPROC(fpsimd_load_partial_state)
+#ifdef CONFIG_ENABLE_FP_SIMD_SETTINGS
+ENTRY(fpsimd_enable_trap)
+ mrs x0, cpacr_el1
+ bic x0, x0, #(3 << 20)
+ orr x0, x0, #(1 << 20)
+ msr cpacr_el1, x0
+ ret
+ENDPROC(fpsimd_enable_trap)
+ENTRY(fpsimd_disable_trap)
+ mrs x0, cpacr_el1
+ orr x0, x0, #(3 << 20)
+ msr cpacr_el1, x0
+ ret
+ENDPROC(fpsimd_disable_trap)
+#endif
+
#endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4ed652aa62fd..f0ca0eb3b077 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -376,6 +376,7 @@ tsk .req x28 // current thread_info
/*
* Exception vectors.
*/
+ .pushsection ".entry.text", "ax"
.align 11
ENTRY(vectors)
@@ -465,6 +466,9 @@ ENDPROC(el1_error_invalid)
*/
.align 6
el1_sync:
+#ifdef CONFIG_QCOM_TLB_EL2_HANDLER
+ smc #0xffff
+#endif
kernel_entry 1
mrs x1, esr_el1 // read the syndrome register
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
@@ -548,6 +552,7 @@ el1_irq:
bl trace_hardirqs_off
#endif
+ get_thread_info tsk
irq_handler
#ifdef CONFIG_PREEMPT
@@ -628,7 +633,7 @@ el0_sync_compat:
cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
b.eq el0_ia
cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
+ b.eq el0_fpsimd_acc_compat
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
@@ -681,13 +686,15 @@ el0_ia:
* Instruction abort handling
*/
mrs x26, far_el1
- // enable interrupts before calling the main handler
- enable_dbg_and_irq
+ enable_dbg
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
ct_user_exit
mov x0, x26
mov x1, x25
mov x2, sp
- bl do_mem_abort
+ bl do_el0_ia_bp_hardening
b ret_to_user
el0_fpsimd_acc:
/*
@@ -699,6 +706,17 @@ el0_fpsimd_acc:
mov x1, sp
bl do_fpsimd_acc
b ret_to_user
+el0_fpsimd_acc_compat:
+ /*
+ * Floating Point or Advanced SIMD access
+ */
+ enable_dbg
+ ct_user_exit
+ mov x0, x25
+ mov x1, sp
+ bl do_fpsimd_acc_compat
+ b ret_to_user
+
el0_fpsimd_exc:
/*
* Floating Point or Advanced SIMD exception
@@ -809,6 +827,33 @@ ENTRY(cpu_switch_to)
ldp x27, x28, [x8], #16
ldp x29, x9, [x8], #16
ldr lr, [x8]
+#ifdef CONFIG_ARM64_REG_REBALANCE_ON_CTX_SW
+ orr x13, x13, x13
+ orr x14, x14, x14
+ orr x15, x15, x15
+ orr x16, x16, x16
+ orr x17, x17, x17
+ orr x18, x18, x18
+ orr x19, x19, x19
+ orr x20, x20, x20
+ orr x21, x21, x21
+ mov v0.16b, v0.16b
+ mov v1.16b, v1.16b
+ mov v2.16b, v2.16b
+ mov v3.16b, v3.16b
+ mov v4.16b, v4.16b
+ mov v5.16b, v5.16b
+ mov v6.16b, v6.16b
+ mov v7.16b, v7.16b
+ mov v8.16b, v8.16b
+ mov v9.16b, v9.16b
+ mov v10.16b, v10.16b
+ mov v11.16b, v11.16b
+ mov v12.16b, v12.16b
+ mov v13.16b, v13.16b
+ mov v14.16b, v14.16b
+ mov v15.16b, v15.16b
+#endif
mov sp, x9
#ifdef CONFIG_THREAD_INFO_IN_TASK
msr sp_el0, x1
@@ -953,6 +998,7 @@ __ni_sys_trace:
bl do_ni_syscall
b __sys_trace_return
+ .popsection // .entry.text
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
/*
* Exception vectors trampoline.
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index f995dae1c8fd..7950df171d86 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -20,6 +20,7 @@
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/signal.h>
@@ -27,6 +28,7 @@
#include <asm/fpsimd.h>
#include <asm/cputype.h>
+#include <asm/app_api.h>
#define FPEXC_IOF (1 << 0)
#define FPEXC_DZF (1 << 1)
@@ -35,6 +37,8 @@
#define FPEXC_IXF (1 << 4)
#define FPEXC_IDF (1 << 7)
+#define FP_SIMD_BIT 31
+
/*
* In order to reduce the number of times the FPSIMD state is needlessly saved
* and restored, we need to keep track of two things:
@@ -88,14 +92,42 @@
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+static DEFINE_PER_CPU(int, fpsimd_stg_enable);
+
+static int fpsimd_settings = 0x1; /* default = 0x1 */
+module_param(fpsimd_settings, int, 0644);
+
+void fpsimd_settings_enable(void)
+{
+ set_app_setting_bit(FP_SIMD_BIT);
+}
+
+void fpsimd_settings_disable(void)
+{
+ clear_app_setting_bit(FP_SIMD_BIT);
+}
/*
* Trapped FP/ASIMD access.
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
- /* TODO: implement lazy context saving/restoring */
- WARN_ON(1);
+ if (!fpsimd_settings)
+ return;
+
+ fpsimd_disable_trap();
+ fpsimd_settings_disable();
+ this_cpu_write(fpsimd_stg_enable, 0);
+}
+
+void do_fpsimd_acc_compat(unsigned int esr, struct pt_regs *regs)
+{
+ if (!fpsimd_settings)
+ return;
+
+ fpsimd_disable_trap();
+ fpsimd_settings_enable();
+ this_cpu_write(fpsimd_stg_enable, 1);
}
/*
@@ -135,6 +167,11 @@ void fpsimd_thread_switch(struct task_struct *next)
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
+ if (fpsimd_settings && __this_cpu_read(fpsimd_stg_enable)) {
+ fpsimd_settings_disable();
+ this_cpu_write(fpsimd_stg_enable, 0);
+ }
+
if (next->mm) {
/*
* If we are switching to a task whose most recent userland
@@ -152,6 +189,14 @@ void fpsimd_thread_switch(struct task_struct *next)
else
set_ti_thread_flag(task_thread_info(next),
TIF_FOREIGN_FPSTATE);
+
+ if (!fpsimd_settings)
+ return;
+
+ if (test_ti_thread_flag(task_thread_info(next), TIF_32BIT))
+ fpsimd_enable_trap();
+ else
+ fpsimd_disable_trap();
}
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8168277277dc..87c56a9bb41c 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -51,9 +51,6 @@
#error TEXT_OFFSET must be less than 2MB
#endif
-#define KERNEL_START _text
-#define KERNEL_END _end
-
/*
* Kernel startup entry point.
* ---------------------------
@@ -102,8 +99,6 @@ _head:
#endif
#ifdef CONFIG_EFI
- .globl __efistub_stext_offset
- .set __efistub_stext_offset, stext - _head
.align 3
pe_header:
.ascii "PE"
@@ -123,11 +118,11 @@ optional_header:
.short 0x20b // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long _end - stext // SizeOfCode
+ .long _end - efi_header_end // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long __efistub_entry - _head // AddressOfEntryPoint
- .long __efistub_stext_offset // BaseOfCode
+ .long efi_header_end - _head // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
@@ -144,7 +139,7 @@ extra_header_fields:
.long _end - _head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long __efistub_stext_offset // SizeOfHeaders
+ .long efi_header_end - _head // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -188,10 +183,10 @@ section_table:
.byte 0
.byte 0
.byte 0 // end of 0 padding of section name
- .long _end - stext // VirtualSize
- .long __efistub_stext_offset // VirtualAddress
- .long _edata - stext // SizeOfRawData
- .long __efistub_stext_offset // PointerToRawData
+ .long _end - efi_header_end // VirtualSize
+ .long efi_header_end - _head // VirtualAddress
+ .long _edata - efi_header_end // SizeOfRawData
+ .long efi_header_end - _head // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
@@ -200,15 +195,18 @@ section_table:
.long 0xe0500020 // Characteristics (section flags)
/*
- * EFI will load stext onwards at the 4k section alignment
+ * EFI will load .text onwards at the 4k section alignment
* described in the PE/COFF header. To ensure that instruction
* sequences using an adrp and a :lo12: immediate will function
- * correctly at this alignment, we must ensure that stext is
+ * correctly at this alignment, we must ensure that .text is
* placed at a 4k boundary in the Image to begin with.
*/
.align 12
+efi_header_end:
#endif
+ __INIT
+
ENTRY(stext)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
@@ -222,13 +220,11 @@ ENTRY(stext)
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, 0f // address to jump to after
+ bl __cpu_setup // initialise processor
+ adr_l x27, __primary_switch // address to jump to after
// MMU has been enabled
- adr_l lr, __enable_mmu // return (PIC) address
- b __cpu_setup // initialise processor
+ b __enable_mmu
ENDPROC(stext)
- .align 3
-0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
@@ -338,7 +334,7 @@ __create_page_tables:
cmp x0, x6
b.lo 1b
- ldr x7, =SWAPPER_MM_MMUFLAGS
+ mov x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
@@ -394,12 +390,13 @@ __create_page_tables:
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
- ldr x5, =KIMAGE_VADDR
+ mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
- ldr w6, kernel_img_size
- add x6, x6, x5
- mov x3, x24 // phys offset
+ adrp x6, _end // runtime __pa(_end)
+ adrp x3, _text // runtime __pa(_text)
+ sub x6, x6, x3 // _end - _text
+ add x6, x6, x5 // runtime __va(_end)
create_block_map x0, x7, x3, x5, x6
/*
@@ -414,16 +411,13 @@ __create_page_tables:
ret x28
ENDPROC(__create_page_tables)
-
-kernel_img_size:
- .long _end - (_head - TEXT_OFFSET)
.ltorg
/*
* The following fragment of code is executed with the MMU enabled.
*/
.set initial_sp, init_thread_union + THREAD_START_SP
-__mmap_switched:
+__primary_switched:
mov x28, lr // preserve LR
adr_l x8, vectors // load VBAR_EL1 with virtual
@@ -438,44 +432,6 @@ __mmap_switched:
bl __pi_memset
dsb ishst // Make zero page visible to PTW
-#ifdef CONFIG_RELOCATABLE
-
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x8, __dynsym_start // start of symbol table
- adr_l x9, __reloc_start // start of reloc table
- adr_l x10, __reloc_end // end of reloc table
-
-0: cmp x9, x10
- b.hs 2f
- ldp x11, x12, [x9], #24
- ldr x13, [x9, #-8]
- cmp w12, #R_AARCH64_RELATIVE
- b.ne 1f
- add x13, x13, x23 // relocate
- str x13, [x11, x23]
- b 0b
-
-1: cmp w12, #R_AARCH64_ABS64
- b.ne 0b
- add x12, x12, x12, lsl #1 // symtab offset: 24x top word
- add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
- ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
- ldr x15, [x12, #8] // Elf64_Sym::st_value
- cmp w14, #-0xf // SHN_ABS (0xfff1) ?
- add x14, x15, x23 // relocate
- csel x15, x14, x15, ne
- add x15, x13, x15
- str x15, [x11, x23]
- b 0b
-
-2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr
- dc cvac, x8 // value visible to secondaries
- dsb sy // with MMU off
-#endif
-
#ifdef CONFIG_THREAD_INFO_IN_TASK
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
@@ -512,13 +468,13 @@ __mmap_switched:
0:
#endif
b start_kernel
-ENDPROC(__mmap_switched)
+ENDPROC(__primary_switched)
/*
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".text","ax"
+ .section ".idmap.text","ax"
ENTRY(kimage_vaddr)
.quad _text - TEXT_OFFSET
@@ -628,7 +584,7 @@ ENDPROC(el2_setup)
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
-ENTRY(set_cpu_boot_mode_flag)
+set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
@@ -640,17 +596,29 @@ ENTRY(set_cpu_boot_mode_flag)
ENDPROC(set_cpu_boot_mode_flag)
/*
+ * These values are written with the MMU off, but read with the MMU on.
+ * Writers will invalidate the corresponding address, discarding up to a
+ * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
+ * sufficient alignment that the CWG doesn't overlap another section.
+ */
+ .pushsection ".mmuoff.data.write", "aw"
+/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
- .pushsection .data..cacheline_aligned
- .align L1_CACHE_SHIFT
ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+ENTRY(__early_cpu_boot_status)
+ .long 0
+
.popsection
/*
@@ -661,7 +629,7 @@ ENTRY(secondary_holding_pen)
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
- ldr x1, =MPIDR_HWID_BITMASK
+ mov_q x1, MPIDR_HWID_BITMASK
and x0, x0, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
@@ -681,7 +649,7 @@ ENTRY(secondary_entry)
b secondary_startup
ENDPROC(secondary_entry)
-ENTRY(secondary_startup)
+secondary_startup:
/*
* Common entry point for secondary CPUs.
*/
@@ -689,14 +657,11 @@ ENTRY(secondary_startup)
adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
- ldr x8, kimage_vaddr
- ldr w9, 0f
- sub x27, x8, w9, sxtw // address to jump to after enabling the MMU
+ adr_l x27, __secondary_switch // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
-0: .long (_text - TEXT_OFFSET) - __secondary_switched
-ENTRY(__secondary_switched)
+__secondary_switched:
adr_l x5, vectors
msr vbar_el1, x5
isb
@@ -727,9 +692,8 @@ ENDPROC(__secondary_switched)
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
- .section ".idmap.text", "ax"
-__enable_mmu:
- mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value
+ENTRY(__enable_mmu)
+ mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -756,7 +720,7 @@ __enable_mmu:
* to take into account by discarding the current kernel mapping and
* creating a new one.
*/
- msr sctlr_el1, x18 // disable the MMU
+ msr sctlr_el1, x22 // disable the MMU
isb
bl __create_page_tables // recreate kernel mapping
@@ -768,7 +732,6 @@ __enable_mmu:
ic iallu // flush instructions fetched
dsb nsh // via old mapping
isb
- add x27, x27, x23 // relocated __mmap_switched
#endif
br x27
ENDPROC(__enable_mmu)
@@ -777,3 +740,38 @@ __no_granule_support:
wfe
b __no_granule_support
ENDPROC(__no_granule_support)
+
+__primary_switch:
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Iterate over each entry in the relocation table, and apply the
+ * relocations in place.
+ */
+ ldr w9, =__rela_offset // offset to reloc table
+ ldr w10, =__rela_size // size of reloc table
+
+ mov_q x11, KIMAGE_VADDR // default virtual offset
+ add x11, x11, x23 // actual virtual offset
+ add x9, x9, x11 // __va(.rela)
+ add x10, x9, x10 // __va(.rela) + sizeof(.rela)
+
+0: cmp x9, x10
+ b.hs 1f
+ ldp x11, x12, [x9], #24
+ ldr x13, [x9, #-8]
+ cmp w12, #R_AARCH64_RELATIVE
+ b.ne 0b
+ add x13, x13, x23 // relocate
+ str x13, [x11, x23]
+ b 0b
+
+1:
+#endif
+ ldr x8, =__primary_switched
+ br x8
+ENDPROC(__primary_switch)
+
+__secondary_switch:
+ ldr x8, =__secondary_switched
+ br x8
+ENDPROC(__secondary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 000000000000..46f29b6560ec
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -0,0 +1,176 @@
+/*
+ * Hibernate low-level support
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ * Author: James Morse <james.morse@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/virt.h>
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+.macro break_before_make_ttbr_switch zero_page, page_table
+ msr ttbr1_el1, \zero_page
+ isb
+ tlbi vmalle1is
+ dsb ish
+ msr ttbr1_el1, \page_table
+ isb
+.endm
+
+
+/*
+ * Resume from hibernate
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a 'safe' page, it can't call out to
+ * other functions by PC-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * code from flush_icache_range() and uses the copy_page() macro.
+ *
+ * This 'safe' page is mapped via ttbr0, and executed from there. This function
+ * switches to a copy of the linear map in ttbr1, performs the restore, then
+ * switches ttbr1 to the original kernel's swapper_pg_dir.
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the Point of Coherence (PoC) before secondary cores can be booted.
+ * Because the kernel modules and executable pages mapped to user space are
+ * also written as data, we clean all pages we touch to the Point of
+ * Unification (PoU).
+ *
+ * x0: physical address of temporary page tables
+ * x1: physical address of swapper page tables
+ * x2: address of cpu_resume
+ * x3: linear map address of restore_pblist in the current kernel
+ * x4: physical address of __hyp_stub_vectors, or 0
+ * x5: physical address of a zero page that remains zero after resume
+ */
+.pushsection ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+ /*
+ * We execute from ttbr0, change ttbr1 to our copied linear map tables
+ * with a break-before-make via the zero page
+ */
+ break_before_make_ttbr_switch x5, x0
+
+ mov x21, x1
+ mov x30, x2
+ mov x24, x4
+ mov x25, x5
+
+ /* walk the restore_pblist and use copy_page() to over-write memory */
+ mov x19, x3
+
+1: ldr x10, [x19, #HIBERN_PBE_ORIG]
+ mov x0, x10
+ ldr x1, [x19, #HIBERN_PBE_ADDR]
+
+ copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
+
+ add x1, x10, #PAGE_SIZE
+ /* Clean the copied page to PoU - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x10, x3
+2: dc cvau, x4 /* clean D line / unified line */
+ add x4, x4, x2
+ cmp x4, x1
+ b.lo 2b
+
+ ldr x19, [x19, #HIBERN_PBE_NEXT]
+ cbnz x19, 1b
+ dsb ish /* wait for PoU cleaning to finish */
+
+ /* switch to the restored kernels page tables */
+ break_before_make_ttbr_switch x25, x21
+
+ ic ialluis
+ dsb ish
+ isb
+
+ cbz x24, 3f /* Do we need to re-initialise EL2? */
+ hvc #0
+3: ret
+
+ .ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+
+/*
+ * Restore the hyp stub.
+ * This must be done before the hibernate page is unmapped by _cpu_resume(),
+ * but happens before any of the hyp-stub's code is cleaned to PoC.
+ *
+ * x24: The physical address of __hyp_stub_vectors
+ */
+el1_sync:
+ msr vbar_el2, x24
+ eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector label
+\label:
+ b \label
+ENDPROC(\label)
+.endm
+
+ invalid_vector el2_sync_invalid
+ invalid_vector el2_irq_invalid
+ invalid_vector el2_fiq_invalid
+ invalid_vector el2_error_invalid
+ invalid_vector el1_sync_invalid
+ invalid_vector el1_irq_invalid
+ invalid_vector el1_fiq_invalid
+ invalid_vector el1_error_invalid
+
+/* el2 vectors - switch el2 here while we restore the memory image. */
+ .align 11
+ENTRY(hibernate_el2_vectors)
+ ventry el2_sync_invalid // Synchronous EL2t
+ ventry el2_irq_invalid // IRQ EL2t
+ ventry el2_fiq_invalid // FIQ EL2t
+ ventry el2_error_invalid // Error EL2t
+
+ ventry el2_sync_invalid // Synchronous EL2h
+ ventry el2_irq_invalid // IRQ EL2h
+ ventry el2_fiq_invalid // FIQ EL2h
+ ventry el2_error_invalid // Error EL2h
+
+ ventry el1_sync // Synchronous 64-bit EL1
+ ventry el1_irq_invalid // IRQ 64-bit EL1
+ ventry el1_fiq_invalid // FIQ 64-bit EL1
+ ventry el1_error_invalid // Error 64-bit EL1
+
+ ventry el1_sync_invalid // Synchronous 32-bit EL1
+ ventry el1_irq_invalid // IRQ 32-bit EL1
+ ventry el1_fiq_invalid // FIQ 32-bit EL1
+ ventry el1_error_invalid // Error 32-bit EL1
+END(hibernate_el2_vectors)
+
+.popsection
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 000000000000..8761eb95ed27
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c
@@ -0,0 +1,520 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+/*
+ * Hibernate core relies on this value being 0 on resume, and marks it
+ * __nosavedata assuming it will keep the resume kernel's '0' value. This
+ * doesn't happen with either KASLR.
+ *
+ * defined as "__visible int in_suspend __nosavedata" in
+ * kernel/power/hibernate.c
+ */
+extern int in_suspend;
+
+/* Find a symbols alias in the linear map */
+#define LMADDR(x) phys_to_virt(virt_to_phys(x))
+
+/* Do we need to reset el2? */
+#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
+
+/* temporary el2 vectors in the __hibernate_exit_text section. */
+extern char hibernate_el2_vectors[];
+
+/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
+extern char __hyp_stub_vectors[];
+
+/*
+ * Values that may not change over hibernate/resume. We put the build number
+ * and date in here so that we guarantee not to resume with a different
+ * kernel.
+ */
+struct arch_hibernate_hdr_invariants {
+ char uts_version[__NEW_UTS_LEN + 1];
+};
+
+/* These values need to be know across a hibernate/restore. */
+static struct arch_hibernate_hdr {
+ struct arch_hibernate_hdr_invariants invariants;
+
+ /* These are needed to find the relocated kernel if built with kaslr */
+ phys_addr_t ttbr1_el1;
+ void (*reenter_kernel)(void);
+
+ /*
+ * We need to know where the __hyp_stub_vectors are after restore to
+ * re-configure el2.
+ */
+ phys_addr_t __hyp_stub_vectors;
+} resume_hdr;
+
+static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
+{
+ memset(i, 0, sizeof(*i));
+ memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+}
+
+void notrace restore_processor_state(void)
+{
+}
+
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct arch_hibernate_hdr *hdr = addr;
+
+ if (max_size < sizeof(*hdr))
+ return -EOVERFLOW;
+
+ arch_hdr_invariants(&hdr->invariants);
+ hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir);
+ hdr->reenter_kernel = _cpu_resume;
+
+ /* We can't use __hyp_get_vectors() because kvm may still be loaded */
+ if (el2_reset_needed())
+ hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+ else
+ hdr->__hyp_stub_vectors = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_save);
+
+int arch_hibernation_header_restore(void *addr)
+{
+ struct arch_hibernate_hdr_invariants invariants;
+ struct arch_hibernate_hdr *hdr = addr;
+
+ arch_hdr_invariants(&invariants);
+ if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
+ pr_crit("Hibernate image not generated by this kernel!\n");
+ return -EINVAL;
+ }
+
+ resume_hdr = *hdr;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_restore);
+
+/*
+ * Copies length bytes, starting at src_start into an new page,
+ * perform cache maintentance, then maps it at the specified address low
+ * address as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text. This function generates a new set of page
+ * tables, which it loads into ttbr0.
+ *
+ * Length is provided as we probably only want 4K of data, even on a 64K
+ * page system.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+ unsigned long dst_addr,
+ phys_addr_t *phys_dst_addr,
+ void *(*allocator)(gfp_t mask),
+ gfp_t mask)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long dst = (unsigned long)allocator(mask);
+
+ if (!dst) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy((void *)dst, src_start, length);
+ flush_icache_range(dst, dst + length);
+
+ pgd = pgd_offset_raw(allocator(mask), dst_addr);
+ if (pgd_none(*pgd)) {
+ pud = allocator(mask);
+ if (!pud) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pgd_populate(&init_mm, pgd, pud);
+ }
+
+ pud = pud_offset(pgd, dst_addr);
+ if (pud_none(*pud)) {
+ pmd = allocator(mask);
+ if (!pmd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pud_populate(&init_mm, pud, pmd);
+ }
+
+ pmd = pmd_offset(pud, dst_addr);
+ if (pmd_none(*pmd)) {
+ pte = allocator(mask);
+ if (!pte) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pmd_populate_kernel(&init_mm, pmd, pte);
+ }
+
+ pte = pte_offset_kernel(pmd, dst_addr);
+ set_pte(pte, __pte(virt_to_phys((void *)dst) |
+ pgprot_val(PAGE_KERNEL_EXEC)));
+
+ /*
+ * Load our new page tables. A strict BBM approach requires that we
+ * ensure that TLBs are free of any entries that may overlap with the
+ * global mappings we are about to install.
+ *
+ * For a real hibernate/resume cycle TTBR0 currently points to a zero
+ * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
+ * runtime services), while for a userspace-driven test_resume cycle it
+ * points to userspace page tables (and we must point it at a zero page
+ * ourselves). Elsewhere we only (un)install the idmap with preemption
+ * disabled, so T0SZ should be as required regardless.
+ */
+ cpu_set_reserved_ttbr0();
+ local_flush_tlb_all();
+ write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+ isb();
+
+ *phys_dst_addr = virt_to_phys((void *)dst);
+
+out:
+ return rc;
+}
+
+#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
+
+int swsusp_arch_suspend(void)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct sleep_stack_data state;
+
+ local_dbg_save(flags);
+
+ if (__cpu_suspend_enter(&state)) {
+ ret = swsusp_save();
+ } else {
+ /* Clean kernel core startup/idle code to PoC*/
+ dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
+ dcache_clean_range(__idmap_text_start, __idmap_text_end);
+
+ /*
+ * Tell the hibernation core that we've just restored
+ * the memory
+ */
+ in_suspend = 0;
+
+ __cpu_suspend_exit();
+ }
+
+ local_dbg_restore(flags);
+
+ return ret;
+}
+
+static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+{
+ pte_t pte = *src_pte;
+
+ if (pte_valid(pte)) {
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_pte, pte_clear_rdonly(pte));
+ } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+ /*
+ * debug_pagealloc will removed the PTE_VALID bit if
+ * the page isn't in use by the resume kernel. It may have
+ * been in use by the original kernel, in which case we need
+ * to put it back in our copy to do the restore.
+ *
+ * Before marking this entry valid, check the pfn should
+ * be mapped.
+ */
+ BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+ }
+}
+
+static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+ unsigned long end)
+{
+ pte_t *src_pte;
+ pte_t *dst_pte;
+ unsigned long addr = start;
+
+ dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pte)
+ return -ENOMEM;
+ pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
+ dst_pte = pte_offset_kernel(dst_pmd, start);
+
+ src_pte = pte_offset_kernel(src_pmd, start);
+ do {
+ _copy_pte(dst_pte, src_pte, addr);
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+ return 0;
+}
+
+static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+ unsigned long end)
+{
+ pmd_t *src_pmd;
+ pmd_t *dst_pmd;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pud_none(*dst_pud)) {
+ dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pmd)
+ return -ENOMEM;
+ pud_populate(&init_mm, dst_pud, dst_pmd);
+ }
+ dst_pmd = pmd_offset(dst_pud, start);
+
+ src_pmd = pmd_offset(src_pud, start);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*src_pmd))
+ continue;
+ if (pmd_table(*src_pmd)) {
+ if (copy_pte(dst_pmd, src_pmd, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pmd(dst_pmd,
+ __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+ unsigned long end)
+{
+ pud_t *dst_pud;
+ pud_t *src_pud;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pgd_none(*dst_pgd)) {
+ dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pud)
+ return -ENOMEM;
+ pgd_populate(&init_mm, dst_pgd, dst_pud);
+ }
+ dst_pud = pud_offset(dst_pgd, start);
+
+ src_pud = pud_offset(src_pgd, start);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*src_pud))
+ continue;
+ if (pud_table(*(src_pud))) {
+ if (copy_pmd(dst_pud, src_pud, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pud(dst_pud,
+ __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+ unsigned long end)
+{
+ unsigned long next;
+ unsigned long addr = start;
+ pgd_t *src_pgd = pgd_offset_k(start);
+
+ dst_pgd = pgd_offset_raw(dst_pgd, start);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*src_pgd))
+ continue;
+ if (copy_pud(dst_pgd, src_pgd, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ */
+int swsusp_arch_resume(void)
+{
+ int rc = 0;
+ void *zero_page;
+ size_t exit_size;
+ pgd_t *tmp_pg_dir;
+ void *lm_restore_pblist;
+ phys_addr_t phys_hibernate_exit;
+ void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
+ void *, phys_addr_t, phys_addr_t);
+
+ /*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy of just the linear map, and use this when
+ * restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+ if (rc)
+ goto out;
+
+ /*
+ * Since we only copied the linear map, we need to find restore_pblist's
+ * linear map address.
+ */
+ lm_restore_pblist = LMADDR(restore_pblist);
+
+ /*
+ * We need a zero page that is zero before & after resume in order to
+ * to break before make on the ttbr1 page tables.
+ */
+ zero_page = (void *)get_safe_page(GFP_ATOMIC);
+ if (!zero_page) {
+ pr_err("Failed to allocate zero page.");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Locate the exit code in the bottom-but-one page, so that *NULL
+ * still has disastrous affects.
+ */
+ hibernate_exit = (void *)PAGE_SIZE;
+ exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+ /*
+ * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
+ * a new set of ttbr0 page tables and load them.
+ */
+ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+ (unsigned long)hibernate_exit,
+ &phys_hibernate_exit,
+ (void *)get_safe_page, GFP_ATOMIC);
+ if (rc) {
+ pr_err("Failed to create safe executable page for hibernate_exit code.");
+ goto out;
+ }
+
+ /*
+ * The hibernate exit text contains a set of el2 vectors, that will
+ * be executed at el2 with the mmu off in order to reload hyp-stub.
+ */
+ __flush_dcache_area(hibernate_exit, exit_size);
+
+ /*
+ * KASLR will cause the el2 vectors to be in a different location in
+ * the resumed kernel. Load hibernate's temporary copy into el2.
+ *
+ * We can skip this step if we booted at EL1, or are running with VHE.
+ */
+ if (el2_reset_needed()) {
+ phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
+ el2_vectors += hibernate_el2_vectors -
+ __hibernate_exit_text_start; /* offset */
+
+ __hyp_set_vectors(el2_vectors);
+ }
+
+ hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
+ resume_hdr.reenter_kernel, lm_restore_pblist,
+ resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
+
+out:
+ return rc;
+}
+
+static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
+ unsigned long action, void *ptr)
+{
+ if (action == PM_HIBERNATION_PREPARE &&
+ cpumask_first(cpu_online_mask) != 0) {
+ pr_warn("CPU0 is offline.\n");
+ return notifier_from_errno(-ENODEV);
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init check_boot_cpu_online_init(void)
+{
+ /*
+ * Set this pm_notifier callback with a lower priority than
+ * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
+ * called earlier to disable cpu hotplug before the cpu online check.
+ */
+ pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
+
+ return 0;
+}
+core_initcall(check_boot_cpu_online_init);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 9e7228bda4a1..bef4b659d816 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -24,6 +24,7 @@
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hw_breakpoint.h>
+#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/smp.h>
@@ -128,6 +129,7 @@ static u64 read_wb_reg(int reg, int n)
return val;
}
+NOKPROBE_SYMBOL(read_wb_reg);
static void write_wb_reg(int reg, int n, u64 val)
{
@@ -141,6 +143,7 @@ static void write_wb_reg(int reg, int n, u64 val)
}
isb();
}
+NOKPROBE_SYMBOL(write_wb_reg);
/*
* Convert a breakpoint privilege level to the corresponding exception
@@ -158,6 +161,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
return -EINVAL;
}
}
+NOKPROBE_SYMBOL(debug_exception_level);
enum hw_breakpoint_ops {
HW_BREAKPOINT_INSTALL,
@@ -616,6 +620,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable)
write_wb_reg(reg, i, ctrl);
}
}
+NOKPROBE_SYMBOL(toggle_bp_registers);
/*
* Debug exception handlers.
@@ -695,6 +700,7 @@ unlock:
return 0;
}
+NOKPROBE_SYMBOL(breakpoint_handler);
/*
* Arm64 hardware does not always report a watchpoint hit address that matches
@@ -835,6 +841,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
return 0;
}
+NOKPROBE_SYMBOL(watchpoint_handler);
/*
* Handle single-step exception.
@@ -892,6 +899,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)
return !handled_exception;
}
+NOKPROBE_SYMBOL(reinstall_suspended_bps);
/*
* Context-switcher for restoring suspended breakpoints.
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 096e957aecb0..ba0127e31b1a 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -22,6 +22,8 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
#include <asm/virt.h>
@@ -55,15 +57,26 @@ ENDPROC(__hyp_stub_vectors)
.align 11
el1_sync:
- mrs x1, esr_el2
- lsr x1, x1, #26
- cmp x1, #0x16
- b.ne 2f // Not an HVC trap
- cbz x0, 1f
- msr vbar_el2, x0 // Set vbar_el2
- b 2f
-1: mrs x0, vbar_el2 // Return vbar_el2
-2: eret
+ mrs x30, esr_el2
+ lsr x30, x30, #ESR_ELx_EC_SHIFT
+
+ cmp x30, #ESR_ELx_EC_HVC64
+ b.ne 9f // Not an HVC trap
+
+ cmp x0, #HVC_GET_VECTORS
+ b.ne 1f
+ mrs x0, vbar_el2
+ b 9f
+
+1: cmp x0, #HVC_SET_VECTORS
+ b.ne 2f
+ msr vbar_el2, x1
+ b 9f
+
+ /* Someone called kvm_call_hyp() against the hyp-stub... */
+2: mov x0, #ARM_EXCEPTION_HYP_GONE
+
+9: eret
ENDPROC(el1_sync)
.macro invalid_vector label
@@ -103,10 +116,18 @@ ENDPROC(\label)
*/
ENTRY(__hyp_get_vectors)
- mov x0, xzr
- // fall through
-ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x0, #HVC_GET_VECTORS
hvc #0
+ ldr lr, [sp], #16
ret
ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x1, x0
+ mov x0, #HVC_SET_VECTORS
+ hvc #0
+ ldr lr, [sp], #16
+ ret
ENDPROC(__hyp_set_vectors)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 5e360ce88f10..86d444f9c2c1 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -73,6 +73,8 @@
#ifdef CONFIG_EFI
+__efistub_stext_offset = stext - _text;
+
/*
* Prevent the symbol aliases below from being emitted into the kallsyms
* table, by forcing them to be absolute symbols (which are conveniently
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 67d3100369c6..a3f8f8bbfc92 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -2,7 +2,7 @@
* Copyright (C) 2013 Huawei Ltd.
* Author: Jiang Liu <liuj97@gmail.com>
*
- * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com>
+ * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -30,6 +30,7 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
+#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
@@ -95,7 +96,8 @@ static void __kprobes *patch_map(void *addr, int fixmap)
if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
page = vmalloc_to_page(addr);
- else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+ else if (!module && (IS_ENABLED(CONFIG_DEBUG_RODATA)
+ || IS_ENABLED(CONFIG_KERNEL_TEXT_RDONLY)))
page = phys_to_page(__pa_symbol(addr));
else
return addr;
@@ -162,6 +164,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
aarch64_insn_is_nop(insn);
}
+bool __kprobes aarch64_insn_uses_literal(u32 insn)
+{
+ /* ldr/ldrsw (literal), prfm */
+
+ return aarch64_insn_is_ldr_lit(insn) ||
+ aarch64_insn_is_ldrsw_lit(insn) ||
+ aarch64_insn_is_adr_adrp(insn) ||
+ aarch64_insn_is_prfm_lit(insn);
+}
+
+bool __kprobes aarch64_insn_is_branch(u32 insn)
+{
+ /* b, bl, cb*, tb*, b.cond, br, blr */
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn) ||
+ aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn) ||
+ aarch64_insn_is_ret(insn) ||
+ aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_bcond(insn);
+}
+
/*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section B2.6.5 "Concurrent modification and execution of instructions":
@@ -363,6 +391,9 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 immlo, immhi, mask;
int shift;
+ if (insn == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
+
switch (type) {
case AARCH64_INSN_IMM_ADR:
shift = 0;
@@ -377,7 +408,7 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
type);
- return 0;
+ return AARCH64_BREAK_FAULT;
}
}
@@ -394,9 +425,12 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
{
int shift;
+ if (insn == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
+
if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
pr_err("%s: unknown register encoding %d\n", __func__, reg);
- return 0;
+ return AARCH64_BREAK_FAULT;
}
switch (type) {
@@ -417,7 +451,7 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
default:
pr_err("%s: unknown register type encoding %d\n", __func__,
type);
- return 0;
+ return AARCH64_BREAK_FAULT;
}
insn &= ~(GENMASK(4, 0) << shift);
@@ -446,7 +480,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
break;
default:
pr_err("%s: unknown size encoding %d\n", __func__, type);
- return 0;
+ return AARCH64_BREAK_FAULT;
}
insn &= ~GENMASK(31, 30);
@@ -460,14 +494,17 @@ static inline long branch_imm_common(unsigned long pc, unsigned long addr,
{
long offset;
- /*
- * PC: A 64-bit Program Counter holding the address of the current
- * instruction. A64 instructions must be word-aligned.
- */
- BUG_ON((pc & 0x3) || (addr & 0x3));
+ if ((pc & 0x3) || (addr & 0x3)) {
+ pr_err("%s: A64 instructions must be word aligned\n", __func__);
+ return range;
+ }
offset = ((long)addr - (long)pc);
- BUG_ON(offset < -range || offset >= range);
+
+ if (offset < -range || offset >= range) {
+ pr_err("%s: offset out of range\n", __func__);
+ return range;
+ }
return offset;
}
@@ -484,6 +521,8 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
* texts are within +/-128M.
*/
offset = branch_imm_common(pc, addr, SZ_128M);
+ if (offset >= SZ_128M)
+ return AARCH64_BREAK_FAULT;
switch (type) {
case AARCH64_INSN_BRANCH_LINK:
@@ -493,7 +532,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
insn = aarch64_insn_get_b_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown branch encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -510,6 +549,8 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
long offset;
offset = branch_imm_common(pc, addr, SZ_1M);
+ if (offset >= SZ_1M)
+ return AARCH64_BREAK_FAULT;
switch (type) {
case AARCH64_INSN_BRANCH_COMP_ZERO:
@@ -519,7 +560,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
insn = aarch64_insn_get_cbnz_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown branch encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -530,7 +571,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -550,7 +591,10 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
insn = aarch64_insn_get_bcond_value();
- BUG_ON(cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL);
+ if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) {
+ pr_err("%s: unknown condition encoding %d\n", __func__, cond);
+ return AARCH64_BREAK_FAULT;
+ }
insn |= cond;
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
@@ -583,7 +627,7 @@ u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
insn = aarch64_insn_get_ret_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown branch encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -606,7 +650,7 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
insn = aarch64_insn_get_str_reg_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown load/store encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -645,26 +689,30 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
insn = aarch64_insn_get_stp_post_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown load/store encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
switch (variant) {
case AARCH64_INSN_VARIANT_32BIT:
- /* offset must be multiples of 4 in the range [-256, 252] */
- BUG_ON(offset & 0x3);
- BUG_ON(offset < -256 || offset > 252);
+ if ((offset & 0x3) || (offset < -256) || (offset > 252)) {
+ pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n",
+ __func__, offset);
+ return AARCH64_BREAK_FAULT;
+ }
shift = 2;
break;
case AARCH64_INSN_VARIANT_64BIT:
- /* offset must be multiples of 8 in the range [-512, 504] */
- BUG_ON(offset & 0x7);
- BUG_ON(offset < -512 || offset > 504);
+ if ((offset & 0x7) || (offset < -512) || (offset > 504)) {
+ pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n",
+ __func__, offset);
+ return AARCH64_BREAK_FAULT;
+ }
shift = 3;
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -702,7 +750,7 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
insn = aarch64_insn_get_subs_imm_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -713,11 +761,14 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
- BUG_ON(imm & ~(SZ_4K - 1));
+ if (imm & ~(SZ_4K - 1)) {
+ pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
+ }
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -746,7 +797,7 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
insn = aarch64_insn_get_sbfm_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown bitfield encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -759,12 +810,18 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
mask = GENMASK(5, 0);
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
- BUG_ON(immr & ~mask);
- BUG_ON(imms & ~mask);
+ if (immr & ~mask) {
+ pr_err("%s: invalid immr encoding %d\n", __func__, immr);
+ return AARCH64_BREAK_FAULT;
+ }
+ if (imms & ~mask) {
+ pr_err("%s: invalid imms encoding %d\n", __func__, imms);
+ return AARCH64_BREAK_FAULT;
+ }
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -793,23 +850,33 @@ u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
insn = aarch64_insn_get_movn_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown movewide encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
- BUG_ON(imm & ~(SZ_64K - 1));
+ if (imm & ~(SZ_64K - 1)) {
+ pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
+ }
switch (variant) {
case AARCH64_INSN_VARIANT_32BIT:
- BUG_ON(shift != 0 && shift != 16);
+ if (shift != 0 && shift != 16) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
case AARCH64_INSN_VARIANT_64BIT:
insn |= AARCH64_INSN_SF_BIT;
- BUG_ON(shift != 0 && shift != 16 && shift != 32 &&
- shift != 48);
+ if (shift != 0 && shift != 16 && shift != 32 && shift != 48) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -843,20 +910,28 @@ u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
insn = aarch64_insn_get_subs_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
switch (variant) {
case AARCH64_INSN_VARIANT_32BIT:
- BUG_ON(shift & ~(SZ_32 - 1));
+ if (shift & ~(SZ_32 - 1)) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
case AARCH64_INSN_VARIANT_64BIT:
insn |= AARCH64_INSN_SF_BIT;
- BUG_ON(shift & ~(SZ_64 - 1));
+ if (shift & ~(SZ_64 - 1)) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -885,11 +960,15 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
insn = aarch64_insn_get_rev32_value();
break;
case AARCH64_INSN_DATA1_REVERSE_64:
- BUG_ON(variant != AARCH64_INSN_VARIANT_64BIT);
+ if (variant != AARCH64_INSN_VARIANT_64BIT) {
+ pr_err("%s: invalid variant for reverse64 %d\n",
+ __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
insn = aarch64_insn_get_rev64_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown data1 encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -900,7 +979,7 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -937,7 +1016,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
insn = aarch64_insn_get_rorv_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown data2 encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -948,7 +1027,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -976,7 +1055,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
insn = aarch64_insn_get_msub_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown data3 encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
@@ -987,7 +1066,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
insn |= AARCH64_INSN_SF_BIT;
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -1037,20 +1116,28 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
insn = aarch64_insn_get_bics_value();
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown logical encoding %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
switch (variant) {
case AARCH64_INSN_VARIANT_32BIT:
- BUG_ON(shift & ~(SZ_32 - 1));
+ if (shift & ~(SZ_32 - 1)) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
case AARCH64_INSN_VARIANT_64BIT:
insn |= AARCH64_INSN_SF_BIT;
- BUG_ON(shift & ~(SZ_64 - 1));
+ if (shift & ~(SZ_64 - 1)) {
+ pr_err("%s: invalid shift encoding %d\n", __func__,
+ shift);
+ return AARCH64_BREAK_FAULT;
+ }
break;
default:
- BUG_ON(1);
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
return AARCH64_BREAK_FAULT;
}
@@ -1116,6 +1203,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset)
BUG();
}
+/*
+ * Extract the Op/CR data from a msr/mrs instruction.
+ */
+u32 aarch64_insn_extract_system_reg(u32 insn)
+{
+ return (insn & 0x1FFFE0) >> 5;
+}
+
bool aarch32_insn_is_wide(u32 insn)
{
return insn >= 0xe800;
@@ -1141,3 +1236,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn)
{
return insn & CRM_MASK;
}
+
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+ return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+ return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+ pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */
+ return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+ pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */
+ return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+ /*PSR_N_BIT ^= PSR_V_BIT */
+ unsigned long temp = pstate ^ (pstate << 3);
+
+ temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */
+ return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+ return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+ __check_eq, __check_ne, __check_cs, __check_cc,
+ __check_mi, __check_pl, __check_vs, __check_vc,
+ __check_hi, __check_ls, __check_ge, __check_lt,
+ __check_gt, __check_le, __check_al, __check_al
+};
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 79b17384effa..d43ea93dc68d 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -19,6 +19,7 @@
#include <linux/export.h>
#include <linux/types.h>
#include <linux/io.h>
+#include <linux/msm_rtb.h>
/*
* Copy data from IO memory space to "real" memory space.
@@ -26,21 +27,21 @@
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)from, 8)) {
- *(u8 *)to = __raw_readb(from);
+ *(u8 *)to = __raw_readb_no_log(from);
from++;
to++;
count--;
}
while (count >= 8) {
- *(u64 *)to = __raw_readq(from);
+ *(u64 *)to = __raw_readq_no_log(from);
from += 8;
to += 8;
count -= 8;
}
while (count) {
- *(u8 *)to = __raw_readb(from);
+ *(u8 *)to = __raw_readb_no_log(from);
from++;
to++;
count--;
@@ -54,21 +55,21 @@ EXPORT_SYMBOL(__memcpy_fromio);
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)to, 8)) {
- __raw_writeb(*(u8 *)from, to);
+ __raw_writeb_no_log(*(u8 *)from, to);
from++;
to++;
count--;
}
while (count >= 8) {
- __raw_writeq(*(u64 *)from, to);
+ __raw_writeq_no_log(*(u64 *)from, to);
from += 8;
to += 8;
count -= 8;
}
while (count) {
- __raw_writeb(*(u8 *)from, to);
+ __raw_writeb_no_log(*(u8 *)from, to);
from++;
to++;
count--;
@@ -88,19 +89,19 @@ void __memset_io(volatile void __iomem *dst, int c, size_t count)
qc |= qc << 32;
while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
- __raw_writeb(c, dst);
+ __raw_writeb_no_log(c, dst);
dst++;
count--;
}
while (count >= 8) {
- __raw_writeq(qc, dst);
+ __raw_writeq_no_log(qc, dst);
dst += 8;
count -= 8;
}
while (count) {
- __raw_writeb(c, dst);
+ __raw_writeb_no_log(c, dst);
dst++;
count--;
}
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 606c21760f23..49e6de3812d9 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -22,6 +22,7 @@
#include <linux/irq.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
+#include <linux/kprobes.h>
#include <asm/traps.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -221,6 +222,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
+NOKPROBE_SYMBOL(kgdb_brk_fn)
static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
{
@@ -232,6 +234,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
return DBG_HOOK_HANDLED;
}
+NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
{
@@ -241,6 +244,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
kgdb_handle_exception(0, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
+NOKPROBE_SYMBOL(kgdb_step_brk_fn);
static struct break_hook kgdb_brkpt_hook = {
.esr_mask = 0xffffffff,
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 997e6b27ff6a..d15b5c2935b3 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -20,16 +20,13 @@
*
* AArch32 user helpers.
*
- * Each segment is 32-byte aligned and will be moved to the top of the high
- * vector page. New segments (if ever needed) must be added in front of
- * existing ones. This mechanism should be used only for things that are
- * really small and justified, and not be abused freely.
+ * These helpers are provided for compatibility with AArch32 binaries that
+ * still need them. They are installed at a fixed address by
+ * aarch32_setup_additional_pages().
*
* See Documentation/arm/kernel_user_helpers.txt for formal definitions.
*/
-#include <asm/unistd.h>
-
.align 5
.globl __kuser_helper_start
__kuser_helper_start:
@@ -77,42 +74,3 @@ __kuser_helper_version: // 0xffff0ffc
.word ((__kuser_helper_end - __kuser_helper_start) >> 5)
.globl __kuser_helper_end
__kuser_helper_end:
-
-/*
- * AArch32 sigreturn code
- *
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- *
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
- .globl __aarch32_sigret_code_start
-__aarch32_sigret_code_start:
-
- /*
- * ARM Code
- */
- .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
-
- /*
- * Thumb code
- */
- .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
-
- /*
- * ARM code
- */
- .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
-
- /*
- * Thumb code
- */
- .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
-
- .globl __aarch32_sigret_code_end
-__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 7f316982ce00..093c13541efb 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -32,11 +32,16 @@
void *module_alloc(unsigned long size)
{
+ gfp_t gfp_mask = GFP_KERNEL;
void *p;
+ /* Silence the initial allocation */
+ if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ gfp_mask |= __GFP_NOWARN;
+
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
module_alloc_base + MODULES_VSIZE,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+ gfp_mask, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
diff --git a/arch/arm64/kernel/perf_debug.c b/arch/arm64/kernel/perf_debug.c
new file mode 100644
index 000000000000..ef3313fd16c6
--- /dev/null
+++ b/arch/arm64/kernel/perf_debug.c
@@ -0,0 +1,73 @@
+/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+/*
+ * Subsequent patches should add an entry to end of this string.
+ * Format is incrementing sequence number followed by text of
+ * patch commit title with newline.
+ * Note trailing ';' is on its own line to simplify addition of
+ * future strings.
+ */
+static char *descriptions =
+ " 0 arm64: perf: add debug patch logging framework\n"
+ " 1 Perf: arm64: Add L1 counters to tracepoints\n"
+ " 5 Perf: arm64: add perf user-mode permissions\n"
+ " 6 Perf: arm64: Add debugfs node to clear PMU\n"
+ " 7 Perf: arm64: Update PMU force reset\n"
+ "10 Perf: arm64: tracectr: initialize counts after hotplug\n"
+ "11 Perf: arm64: Refine disable/enable in tracecounters\n"
+ "15 Perf: arm64: make debug dir handle exportable\n"
+ "16 Perf: arm64: add perf trace user\n"
+ "17 Perf: arm64: add support for kryo pmu\n"
+;
+
+static ssize_t desc_read(struct file *fp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ return simple_read_from_buffer(buf, count, pos, descriptions,
+ strlen(descriptions));
+}
+
+static const struct file_operations perf_debug_desc_fops = {
+ .read = desc_read,
+};
+
+static int perf_debugfs_init(void)
+{
+ int ret = 0;
+ struct dentry *dir;
+ struct dentry *file;
+
+ dir = debugfs_create_dir("msm-perf-patches", NULL);
+ if (IS_ERR_OR_NULL(dir)) {
+ pr_err("failed to create msm-perf-patches dir in debugfs\n");
+ ret = PTR_ERR(dir);
+ goto init_exit;
+ }
+
+ file = debugfs_create_file("descriptions", 0444, dir, NULL,
+ &perf_debug_desc_fops);
+ if (IS_ERR_OR_NULL(file)) {
+ debugfs_remove(dir);
+ pr_err("failed to create descriptions file for msm-perf-patches\n");
+ ret = PTR_ERR(file);
+ goto init_exit;
+ }
+
+init_exit:
+ return ret;
+}
+late_initcall(perf_debugfs_init);
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
deleted file mode 100644
index e99a0ed7e66b..000000000000
--- a/arch/arm64/kernel/perf_event.c
+++ /dev/null
@@ -1,682 +0,0 @@
-/*
- * PMU support
- *
- * Copyright (C) 2012 ARM Limited
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This code is based heavily on the ARMv7 perf event code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/irq_regs.h>
-
-#include <linux/of.h>
-#include <linux/perf/arm_pmu.h>
-#include <linux/platform_device.h>
-
-/*
- * ARMv8 PMUv3 Performance Events handling code.
- * Common event types.
- */
-enum armv8_pmuv3_perf_types {
- /* Required events. */
- ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00,
- ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03,
- ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04,
- ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
- ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11,
- ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12,
-
- /* At least one of the following is required. */
- ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08,
- ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B,
-
- /* Common architectural events. */
- ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06,
- ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07,
- ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09,
- ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A,
- ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B,
- ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C,
- ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D,
- ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E,
- ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F,
- ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C,
-
- /* Common microarchitectural events. */
- ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01,
- ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02,
- ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05,
- ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13,
- ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14,
- ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15,
- ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16,
- ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17,
- ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18,
- ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19,
- ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A,
- ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D,
-};
-
-/* ARMv8 Cortex-A53 specific event types. */
-enum armv8_a53_pmu_perf_types {
- ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2,
-};
-
-/* ARMv8 Cortex-A57 specific event types. */
-enum armv8_a57_perf_types {
- ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40,
- ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41,
- ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42,
- ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43,
- ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c,
- ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d,
-};
-
-/* PMUv3 HW events mapping. */
-static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-/* ARM Cortex-A53 HW events mapping. */
-static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
- PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
-};
-
-static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- PERF_CACHE_MAP_ALL_UNSUPPORTED,
-
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
-
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
-
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
-
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
-
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-};
-
-
-/*
- * Perf Events' indices
- */
-#define ARMV8_IDX_CYCLE_COUNTER 0
-#define ARMV8_IDX_COUNTER0 1
-#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
- (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
-
-#define ARMV8_MAX_COUNTERS 32
-#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1)
-
-/*
- * ARMv8 low level PMU access
- */
-
-/*
- * Perf Event to low level counters mapping
- */
-#define ARMV8_IDX_TO_COUNTER(x) \
- (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK)
-
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */
-#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */
-#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */
-#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */
-#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
-#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMCR_N_MASK 0x1f
-#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */
-#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */
-
-/*
- * Event filters for PMUv3
- */
-#define ARMV8_EXCLUDE_EL1 (1 << 31)
-#define ARMV8_EXCLUDE_EL0 (1 << 30)
-#define ARMV8_INCLUDE_EL2 (1 << 27)
-
-static inline u32 armv8pmu_pmcr_read(void)
-{
- u32 val;
- asm volatile("mrs %0, pmcr_el0" : "=r" (val));
- return val;
-}
-
-static inline void armv8pmu_pmcr_write(u32 val)
-{
- val &= ARMV8_PMCR_MASK;
- isb();
- asm volatile("msr pmcr_el0, %0" :: "r" (val));
-}
-
-static inline int armv8pmu_has_overflowed(u32 pmovsr)
-{
- return pmovsr & ARMV8_OVERFLOWED_MASK;
-}
-
-static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
-{
- return idx >= ARMV8_IDX_CYCLE_COUNTER &&
- idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
-}
-
-static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
-{
- return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
-}
-
-static inline int armv8pmu_select_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmselr_el0, %0" :: "r" (counter));
- isb();
-
- return idx;
-}
-
-static inline u32 armv8pmu_read_counter(struct perf_event *event)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
- u32 value = 0;
-
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u reading wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- asm volatile("mrs %0, pmccntr_el0" : "=r" (value));
- else if (armv8pmu_select_counter(idx) == idx)
- asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value));
-
- return value;
-}
-
-static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
-{
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
-
- if (!armv8pmu_counter_valid(cpu_pmu, idx))
- pr_err("CPU%u writing wrong counter %d\n",
- smp_processor_id(), idx);
- else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- asm volatile("msr pmccntr_el0, %0" :: "r" (value));
- else if (armv8pmu_select_counter(idx) == idx)
- asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
-}
-
-static inline void armv8pmu_write_evtype(int idx, u32 val)
-{
- if (armv8pmu_select_counter(idx) == idx) {
- val &= ARMV8_EVTYPE_MASK;
- asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
- }
-}
-
-static inline int armv8pmu_enable_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
- return idx;
-}
-
-static inline int armv8pmu_disable_counter(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
- return idx;
-}
-
-static inline int armv8pmu_enable_intens(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
- return idx;
-}
-
-static inline int armv8pmu_disable_intens(int idx)
-{
- u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
- isb();
- /* Clear the overflow flag in case an interrupt is pending. */
- asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
- isb();
-
- return idx;
-}
-
-static inline u32 armv8pmu_getreset_flags(void)
-{
- u32 value;
-
- /* Read */
- asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
-
- /* Write to clear flags */
- value &= ARMV8_OVSR_MASK;
- asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
-
- return value;
-}
-
-static void armv8pmu_enable_event(struct perf_event *event)
-{
- unsigned long flags;
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
- int idx = hwc->idx;
-
- /*
- * Enable counter and interrupt, and set the counter to count
- * the event that we're interested in.
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- armv8pmu_disable_counter(idx);
-
- /*
- * Set event (if destined for PMNx counters).
- */
- armv8pmu_write_evtype(idx, hwc->config_base);
-
- /*
- * Enable interrupt for this counter
- */
- armv8pmu_enable_intens(idx);
-
- /*
- * Enable counter
- */
- armv8pmu_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void armv8pmu_disable_event(struct perf_event *event)
-{
- unsigned long flags;
- struct hw_perf_event *hwc = &event->hw;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
- int idx = hwc->idx;
-
- /*
- * Disable counter and interrupt
- */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
- /*
- * Disable counter
- */
- armv8pmu_disable_counter(idx);
-
- /*
- * Disable interrupt for this counter
- */
- armv8pmu_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
-{
- u32 pmovsr;
- struct perf_sample_data data;
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
- struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
- struct pt_regs *regs;
- int idx;
-
- /*
- * Get and reset the IRQ flags
- */
- pmovsr = armv8pmu_getreset_flags();
-
- /*
- * Did an overflow occur?
- */
- if (!armv8pmu_has_overflowed(pmovsr))
- return IRQ_NONE;
-
- /*
- * Handle the counter(s) overflow(s)
- */
- regs = get_irq_regs();
-
- for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- /* Ignore if we don't have an event. */
- if (!event)
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event);
- perf_sample_data_init(&data, 0, hwc->last_period);
- if (!armpmu_event_set_period(event))
- continue;
-
- if (perf_event_overflow(event, &data, regs))
- cpu_pmu->disable(event);
- }
-
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static void armv8pmu_start(struct arm_pmu *cpu_pmu)
-{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
- /* Enable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
-{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
- /* Disable all counters */
- armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
-}
-
-static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
- struct perf_event *event)
-{
- int idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
-
- /* Always place a cycle counter into the cycle counter. */
- if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
- if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
- return -EAGAIN;
-
- return ARMV8_IDX_CYCLE_COUNTER;
- }
-
- /*
- * For anything other than a cycle counter, try and use
- * the events counters
- */
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EAGAIN;
-}
-
-/*
- * Add an event filter to a given event. This will only work for PMUv2 PMUs.
- */
-static int armv8pmu_set_event_filter(struct hw_perf_event *event,
- struct perf_event_attr *attr)
-{
- unsigned long config_base = 0;
-
- if (attr->exclude_idle)
- return -EPERM;
- if (attr->exclude_user)
- config_base |= ARMV8_EXCLUDE_EL0;
- if (attr->exclude_kernel)
- config_base |= ARMV8_EXCLUDE_EL1;
- if (!attr->exclude_hv)
- config_base |= ARMV8_INCLUDE_EL2;
-
- /*
- * Install the filter into config_base as this is used to
- * construct the event type.
- */
- event->config_base = config_base;
-
- return 0;
-}
-
-static void armv8pmu_reset(void *info)
-{
- struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
- u32 idx, nb_cnt = cpu_pmu->num_events;
-
- /* The counter and interrupt enable registers are unknown at reset. */
- for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
- armv8pmu_disable_counter(idx);
- armv8pmu_disable_intens(idx);
- }
-
- /* Initialize & Reset PMNC: C and P bits. */
- armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
-}
-
-static int armv8_pmuv3_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv8_pmuv3_perf_map,
- &armv8_pmuv3_perf_cache_map,
- ARMV8_EVTYPE_EVENT);
-}
-
-static int armv8_a53_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv8_a53_perf_map,
- &armv8_a53_perf_cache_map,
- ARMV8_EVTYPE_EVENT);
-}
-
-static int armv8_a57_map_event(struct perf_event *event)
-{
- return armpmu_map_event(event, &armv8_a57_perf_map,
- &armv8_a57_perf_cache_map,
- ARMV8_EVTYPE_EVENT);
-}
-
-static void armv8pmu_read_num_pmnc_events(void *info)
-{
- int *nb_cnt = info;
-
- /* Read the nb of CNTx counters supported from PMNC */
- *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
-
- /* Add the CPU cycles counter */
- *nb_cnt += 1;
-}
-
-static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
-{
- return smp_call_function_any(&arm_pmu->supported_cpus,
- armv8pmu_read_num_pmnc_events,
- &arm_pmu->num_events, 1);
-}
-
-static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
-{
- cpu_pmu->handle_irq = armv8pmu_handle_irq,
- cpu_pmu->enable = armv8pmu_enable_event,
- cpu_pmu->disable = armv8pmu_disable_event,
- cpu_pmu->read_counter = armv8pmu_read_counter,
- cpu_pmu->write_counter = armv8pmu_write_counter,
- cpu_pmu->get_event_idx = armv8pmu_get_event_idx,
- cpu_pmu->start = armv8pmu_start,
- cpu_pmu->stop = armv8pmu_stop,
- cpu_pmu->reset = armv8pmu_reset,
- cpu_pmu->max_period = (1LLU << 32) - 1,
- cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
-}
-
-static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
-{
- armv8_pmu_init(cpu_pmu);
- cpu_pmu->name = "armv8_pmuv3";
- cpu_pmu->map_event = armv8_pmuv3_map_event;
- return armv8pmu_probe_num_events(cpu_pmu);
-}
-
-static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv8_pmu_init(cpu_pmu);
- cpu_pmu->name = "armv8_cortex_a53";
- cpu_pmu->map_event = armv8_a53_map_event;
- return armv8pmu_probe_num_events(cpu_pmu);
-}
-
-static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
-{
- armv8_pmu_init(cpu_pmu);
- cpu_pmu->name = "armv8_cortex_a57";
- cpu_pmu->map_event = armv8_a57_map_event;
- return armv8pmu_probe_num_events(cpu_pmu);
-}
-
-static const struct of_device_id armv8_pmu_of_device_ids[] = {
- {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
- {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
- {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
- {},
-};
-
-static int armv8_pmu_device_probe(struct platform_device *pdev)
-{
- return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
-}
-
-static struct platform_driver armv8_pmu_driver = {
- .driver = {
- .name = "armv8-pmu",
- .of_match_table = armv8_pmu_of_device_ids,
- .suppress_bind_attrs = true,
- },
- .probe = armv8_pmu_device_probe,
-};
-
-static int __init register_armv8_pmu_driver(void)
-{
- return platform_driver_register(&armv8_pmu_driver);
-}
-device_initcall(register_armv8_pmu_driver);
diff --git a/arch/arm64/kernel/perf_trace_counters.c b/arch/arm64/kernel/perf_trace_counters.c
new file mode 100644
index 000000000000..7b852e36eaa2
--- /dev/null
+++ b/arch/arm64/kernel/perf_trace_counters.c
@@ -0,0 +1,180 @@
+/* Copyright (c) 2013-2014, 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/cpu.h>
+#include <linux/tracepoint.h>
+#include <trace/events/sched.h>
+#define CREATE_TRACE_POINTS
+#include "perf_trace_counters.h"
+
+static unsigned int tp_pid_state;
+
+DEFINE_PER_CPU(u32, cntenset_val);
+DEFINE_PER_CPU(u32, previous_ccnt);
+DEFINE_PER_CPU(u32[NUM_L1_CTRS], previous_l1_cnts);
+DEFINE_PER_CPU(u32, old_pid);
+DEFINE_PER_CPU(u32, hotplug_flag);
+
+static int tracectr_cpu_hotplug_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned long cpu = (unsigned long)hcpu;
+
+ if ((action & (~CPU_TASKS_FROZEN)) == CPU_STARTING)
+ per_cpu(hotplug_flag, cpu) = 1;
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block tracectr_cpu_hotplug_notifier_block = {
+ .notifier_call = tracectr_cpu_hotplug_notifier,
+};
+
+static void setup_prev_cnts(u32 cpu, u32 cnten_val)
+{
+ int i;
+
+ if (cnten_val & CC)
+ asm volatile("mrs %0, pmccntr_el0"
+ : "=r"(per_cpu(previous_ccnt, cpu)));
+
+ for (i = 0; i < NUM_L1_CTRS; i++) {
+ if (cnten_val & (1 << i)) {
+ /* Select */
+ asm volatile("msr pmselr_el0, %0" : : "r"(i));
+ isb();
+ /* Read value */
+ asm volatile("mrs %0, pmxevcntr_el0"
+ : "=r"(per_cpu(previous_l1_cnts[i], cpu)));
+ }
+ }
+}
+
+void tracectr_notifier(void *ignore, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
+{
+ u32 cnten_val;
+ int current_pid;
+ u32 cpu = task_cpu(next);
+
+ if (tp_pid_state != 1)
+ return;
+ current_pid = next->pid;
+ if (per_cpu(old_pid, cpu) != -1) {
+ asm volatile("mrs %0, pmcntenset_el0" : "=r" (cnten_val));
+ per_cpu(cntenset_val, cpu) = cnten_val;
+ /* Disable all the counters that were enabled */
+ asm volatile("msr pmcntenclr_el0, %0" : : "r" (cnten_val));
+
+ if (per_cpu(hotplug_flag, cpu) == 1) {
+ per_cpu(hotplug_flag, cpu) = 0;
+ setup_prev_cnts(cpu, cnten_val);
+ } else {
+ trace_sched_switch_with_ctrs(per_cpu(old_pid, cpu),
+ current_pid);
+ }
+
+ /* Enable all the counters that were disabled */
+ asm volatile("msr pmcntenset_el0, %0" : : "r" (cnten_val));
+ }
+ per_cpu(old_pid, cpu) = current_pid;
+}
+
+static void enable_tp_pid(void)
+{
+ if (tp_pid_state == 0) {
+ tp_pid_state = 1;
+ register_trace_sched_switch(tracectr_notifier, NULL);
+ }
+}
+
+static void disable_tp_pid(void)
+{
+ if (tp_pid_state == 1) {
+ tp_pid_state = 0;
+ unregister_trace_sched_switch(tracectr_notifier, NULL);
+ }
+}
+
+static ssize_t read_enabled_perftp_file_bool(struct file *file,
+ char __user *user_buf, size_t count, loff_t *ppos)
+{
+ char buf[2];
+ buf[1] = '\n';
+ if (tp_pid_state == 0)
+ buf[0] = '0';
+ else
+ buf[0] = '1';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t write_enabled_perftp_file_bool(struct file *file,
+ const char __user *user_buf, size_t count, loff_t *ppos)
+{
+ char buf[32];
+ size_t buf_size;
+
+ buf[0] = 0;
+ buf_size = min(count, (sizeof(buf)-1));
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+ switch (buf[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ enable_tp_pid();
+ break;
+ case 'n':
+ case 'N':
+ case '0':
+ disable_tp_pid();
+ break;
+ }
+
+ return count;
+}
+
+static const struct file_operations fops_perftp = {
+ .read = read_enabled_perftp_file_bool,
+ .write = write_enabled_perftp_file_bool,
+ .llseek = default_llseek,
+};
+
+int __init init_tracecounters(void)
+{
+ struct dentry *dir;
+ struct dentry *file;
+ unsigned int value = 1;
+ int cpu;
+
+ dir = debugfs_create_dir("perf_debug_tp", NULL);
+ if (!dir)
+ return -ENOMEM;
+ file = debugfs_create_file("enabled", 0660, dir,
+ &value, &fops_perftp);
+ if (!file) {
+ debugfs_remove(dir);
+ return -ENOMEM;
+ }
+ for_each_possible_cpu(cpu)
+ per_cpu(old_pid, cpu) = -1;
+ register_cpu_notifier(&tracectr_cpu_hotplug_notifier_block);
+ return 0;
+}
+
+int __exit exit_tracecounters(void)
+{
+ unregister_cpu_notifier(&tracectr_cpu_hotplug_notifier_block);
+ return 0;
+}
+late_initcall(init_tracecounters);
diff --git a/arch/arm64/kernel/perf_trace_counters.h b/arch/arm64/kernel/perf_trace_counters.h
new file mode 100644
index 000000000000..ff3bd371791d
--- /dev/null
+++ b/arch/arm64/kernel/perf_trace_counters.h
@@ -0,0 +1,111 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM perf_trace_counters
+
+#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _PERF_TRACE_COUNTERS_H_
+
+/* Ctr index for PMCNTENSET/CLR */
+#define CC 0x80000000
+#define C0 0x1
+#define C1 0x2
+#define C2 0x4
+#define C3 0x8
+#define C4 0x10
+#define C5 0x20
+#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5)
+#define NUM_L1_CTRS 6
+
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/tracepoint.h>
+
+DECLARE_PER_CPU(u32, cntenset_val);
+DECLARE_PER_CPU(u32, previous_ccnt);
+DECLARE_PER_CPU(u32[NUM_L1_CTRS], previous_l1_cnts);
+TRACE_EVENT(sched_switch_with_ctrs,
+
+ TP_PROTO(pid_t prev, pid_t next),
+
+ TP_ARGS(prev, next),
+
+ TP_STRUCT__entry(
+ __field(pid_t, old_pid)
+ __field(pid_t, new_pid)
+ __field(u32, cctr)
+ __field(u32, ctr0)
+ __field(u32, ctr1)
+ __field(u32, ctr2)
+ __field(u32, ctr3)
+ __field(u32, ctr4)
+ __field(u32, ctr5)
+ ),
+
+ TP_fast_assign(
+ u32 cpu = smp_processor_id();
+ u32 i;
+ u32 cnten_val;
+ u32 total_ccnt = 0;
+ u32 total_cnt = 0;
+ u32 delta_l1_cnts[NUM_L1_CTRS];
+ __entry->old_pid = prev;
+ __entry->new_pid = next;
+
+ cnten_val = per_cpu(cntenset_val, cpu);
+
+ if (cnten_val & CC) {
+ asm volatile("mrs %0, pmccntr_el0"
+ : "=r" (total_ccnt));
+ /* Read value */
+ __entry->cctr = total_ccnt -
+ per_cpu(previous_ccnt, cpu);
+ per_cpu(previous_ccnt, cpu) = total_ccnt;
+ }
+ for (i = 0; i < NUM_L1_CTRS; i++) {
+ if (cnten_val & (1 << i)) {
+ /* Select */
+ asm volatile("msr pmselr_el0, %0"
+ : : "r" (i));
+ isb();
+ asm volatile("mrs %0, pmxevcntr_el0"
+ : "=r" (total_cnt));
+ /* Read value */
+ delta_l1_cnts[i] = total_cnt -
+ per_cpu(previous_l1_cnts[i], cpu);
+ per_cpu(previous_l1_cnts[i], cpu) =
+ total_cnt;
+ } else
+ delta_l1_cnts[i] = 0;
+ }
+
+ __entry->ctr0 = delta_l1_cnts[0];
+ __entry->ctr1 = delta_l1_cnts[1];
+ __entry->ctr2 = delta_l1_cnts[2];
+ __entry->ctr3 = delta_l1_cnts[3];
+ __entry->ctr4 = delta_l1_cnts[4];
+ __entry->ctr5 = delta_l1_cnts[5];
+ ),
+
+ TP_printk("prev_pid=%d, next_pid=%d, CCNTR: %u, CTR0: %u, CTR1: %u, CTR2: %u, CTR3: %u, CTR4: %u, CTR5: %u",
+ __entry->old_pid, __entry->new_pid,
+ __entry->cctr, __entry->ctr0, __entry->ctr1,
+ __entry->ctr2, __entry->ctr3,
+ __entry->ctr4, __entry->ctr5)
+);
+
+#endif
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/arm64/kernel
+#define TRACE_INCLUDE_FILE perf_trace_counters
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kernel/perf_trace_user.c b/arch/arm64/kernel/perf_trace_user.c
new file mode 100644
index 000000000000..98bbb2045265
--- /dev/null
+++ b/arch/arm64/kernel/perf_trace_user.c
@@ -0,0 +1,96 @@
+/* Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/preempt.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+
+#define CREATE_TRACE_POINTS
+#include "perf_trace_user.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM perf_trace_counters
+
+#define TRACE_USER_MAX_BUF_SIZE 100
+
+static ssize_t perf_trace_write(struct file *file,
+ const char __user *user_string_in,
+ size_t len, loff_t *ppos)
+{
+ u32 cnten_val;
+ int rc;
+ char buf[TRACE_USER_MAX_BUF_SIZE + 1];
+ ssize_t length;
+
+ if (len == 0)
+ return 0;
+
+ length = len > TRACE_USER_MAX_BUF_SIZE ? TRACE_USER_MAX_BUF_SIZE : len;
+
+ rc = copy_from_user(buf, user_string_in, length);
+ if (rc) {
+ pr_err("%s copy_from_user failed, rc=%d\n", __func__, rc);
+ return length;
+ }
+
+ /* Remove any trailing newline and make sure string is terminated */
+ if (buf[length - 1] == '\n')
+ buf[length - 1] = '\0';
+ else
+ buf[length] = '\0';
+
+ /*
+ * Disable preemption to ensure that all the performance counter
+ * accesses happen on the same cpu
+ */
+ preempt_disable();
+ /* stop counters, call the trace function, restart them */
+
+ asm volatile("mrs %0, pmcntenset_el0" : "=r" (cnten_val));
+ /* Disable all the counters that were enabled */
+ asm volatile("msr pmcntenclr_el0, %0" : : "r" (cnten_val));
+
+ trace_perf_trace_user(buf, cnten_val);
+
+ /* Enable all the counters that were disabled */
+ asm volatile("msr pmcntenset_el0, %0" : : "r" (cnten_val));
+ preempt_enable();
+
+ return length;
+}
+
+static const struct file_operations perf_trace_fops = {
+ .write = perf_trace_write
+};
+
+static int __init init_perf_trace(void)
+{
+ struct dentry *dir;
+ struct dentry *file;
+ unsigned int value = 1;
+
+ dir = perf_create_debug_dir();
+ if (!dir)
+ return -ENOMEM;
+ file = debugfs_create_file("trace_marker", S_IWUSR | S_IWGRP, dir,
+ &value, &perf_trace_fops);
+ if (!file)
+ return -ENOMEM;
+
+ return 0;
+}
+
+late_initcall(init_perf_trace);
diff --git a/arch/arm64/kernel/perf_trace_user.h b/arch/arm64/kernel/perf_trace_user.h
new file mode 100644
index 000000000000..e5f7336029af
--- /dev/null
+++ b/arch/arm64/kernel/perf_trace_user.h
@@ -0,0 +1,85 @@
+/* Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#if !defined(_PERF_TRACE_USER_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _PERF_TRACE_USER_H_
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM perf_trace_counters
+
+#include <linux/tracepoint.h>
+
+#define CNTENSET_CC 0x80000000
+#define NUM_L1_CTRS 4
+
+TRACE_EVENT(perf_trace_user,
+ TP_PROTO(char *string, u32 cnten_val),
+ TP_ARGS(string, cnten_val),
+
+ TP_STRUCT__entry(
+ __field(u32, cctr)
+ __field(u32, ctr0)
+ __field(u32, ctr1)
+ __field(u32, ctr2)
+ __field(u32, ctr3)
+ __field(u32, lctr0)
+ __field(u32, lctr1)
+ __string(user_string, string)
+ ),
+
+ TP_fast_assign(
+ u32 cnt;
+ u32 l1_cnts[NUM_L1_CTRS];
+ int i;
+
+ if (cnten_val & CNTENSET_CC) {
+ /* Read value */
+ asm volatile("mrs %0, pmccntr_el0" : "=r" (cnt));
+ __entry->cctr = cnt;
+ } else
+ __entry->cctr = 0;
+ for (i = 0; i < NUM_L1_CTRS; i++) {
+ if (cnten_val & (1 << i)) {
+ /* Select */
+ asm volatile("msr pmselr_el0, %0"
+ : : "r" (i));
+ isb();
+ /* Read value */
+ asm volatile("mrs %0, pmxevcntr_el0"
+ : "=r" (cnt));
+ l1_cnts[i] = cnt;
+ } else {
+ l1_cnts[i] = 0;
+ }
+ }
+
+ __entry->ctr0 = l1_cnts[0];
+ __entry->ctr1 = l1_cnts[1];
+ __entry->ctr2 = l1_cnts[2];
+ __entry->ctr3 = l1_cnts[3];
+ __entry->lctr0 = 0;
+ __entry->lctr1 = 0;
+ __assign_str(user_string, string);
+ ),
+
+ TP_printk("CCNTR: %u, CTR0: %u, CTR1: %u, CTR2: %u, CTR3: %u, L2CTR0: %u, L2CTR1: %u, MSG=%s",
+ __entry->cctr, __entry->ctr0, __entry->ctr1,
+ __entry->ctr2, __entry->ctr3,
+ __entry->lctr0, __entry->lctr1,
+ __get_str(user_string)
+ )
+ );
+
+#endif
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/arm64/kernel
+#define TRACE_INCLUDE_FILE perf_trace_user
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
new file mode 100644
index 000000000000..ce06312e3d34
--- /dev/null
+++ b/arch/arm64/kernel/probes/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \
+ kprobes_trampoline.o \
+ simulate-insn.o
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
new file mode 100644
index 000000000000..f7931d900bca
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -0,0 +1,174 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <asm/kprobes.h>
+#include <asm/insn.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static bool __kprobes aarch64_insn_is_steppable(u32 insn)
+{
+ /*
+ * Branch instructions will write a new value into the PC which is
+ * likely to be relative to the XOL address and therefore invalid.
+ * Deliberate generation of an exception during stepping is also not
+ * currently safe. Lastly, MSR instructions can do any number of nasty
+ * things we can't handle during single-stepping.
+ */
+ if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) {
+ if (aarch64_insn_is_branch(insn) ||
+ aarch64_insn_is_msr_imm(insn) ||
+ aarch64_insn_is_msr_reg(insn) ||
+ aarch64_insn_is_exception(insn) ||
+ aarch64_insn_is_eret(insn))
+ return false;
+
+ /*
+ * The MRS instruction may not return a correct value when
+ * executing in the single-stepping environment. We do make one
+ * exception, for reading the DAIF bits.
+ */
+ if (aarch64_insn_is_mrs(insn))
+ return aarch64_insn_extract_system_reg(insn)
+ != AARCH64_INSN_SPCLREG_DAIF;
+
+ /*
+ * The HINT instruction is is problematic when single-stepping,
+ * except for the NOP case.
+ */
+ if (aarch64_insn_is_hint(insn))
+ return aarch64_insn_is_nop(insn);
+
+ return true;
+ }
+
+ /*
+ * Instructions which load PC relative literals are not going to work
+ * when executed from an XOL slot. Instructions doing an exclusive
+ * load/store are not going to complete successfully when single-step
+ * exception handling happens in the middle of the sequence.
+ */
+ if (aarch64_insn_uses_literal(insn) ||
+ aarch64_insn_is_exclusive(insn))
+ return false;
+
+ return true;
+}
+
+/* Return:
+ * INSN_REJECTED If instruction is one not allowed to kprobe,
+ * INSN_GOOD If instruction is supported and uses instruction slot,
+ * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+static enum kprobe_insn __kprobes
+arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+{
+ /*
+ * Instructions reading or modifying the PC won't work from the XOL
+ * slot.
+ */
+ if (aarch64_insn_is_steppable(insn))
+ return INSN_GOOD;
+
+ if (aarch64_insn_is_bcond(insn)) {
+ asi->handler = simulate_b_cond;
+ } else if (aarch64_insn_is_cbz(insn) ||
+ aarch64_insn_is_cbnz(insn)) {
+ asi->handler = simulate_cbz_cbnz;
+ } else if (aarch64_insn_is_tbz(insn) ||
+ aarch64_insn_is_tbnz(insn)) {
+ asi->handler = simulate_tbz_tbnz;
+ } else if (aarch64_insn_is_adr_adrp(insn)) {
+ asi->handler = simulate_adr_adrp;
+ } else if (aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn)) {
+ asi->handler = simulate_b_bl;
+ } else if (aarch64_insn_is_br(insn) ||
+ aarch64_insn_is_blr(insn) ||
+ aarch64_insn_is_ret(insn)) {
+ asi->handler = simulate_br_blr_ret;
+ } else if (aarch64_insn_is_ldr_lit(insn)) {
+ asi->handler = simulate_ldr_literal;
+ } else if (aarch64_insn_is_ldrsw_lit(insn)) {
+ asi->handler = simulate_ldrsw_literal;
+ } else {
+ /*
+ * Instruction cannot be stepped out-of-line and we don't
+ * (yet) simulate it.
+ */
+ return INSN_REJECTED;
+ }
+
+ return INSN_GOOD_NO_SLOT;
+}
+
+static bool __kprobes
+is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
+{
+ while (scan_start > scan_end) {
+ /*
+ * atomic region starts from exclusive load and ends with
+ * exclusive store.
+ */
+ if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start)))
+ return false;
+ else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start)))
+ return true;
+ scan_start--;
+ }
+
+ return false;
+}
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
+{
+ enum kprobe_insn decoded;
+ kprobe_opcode_t insn = le32_to_cpu(*addr);
+ kprobe_opcode_t *scan_start = addr - 1;
+ kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+ struct module *mod;
+#endif
+
+ if (addr >= (kprobe_opcode_t *)_text &&
+ scan_end < (kprobe_opcode_t *)_text)
+ scan_end = (kprobe_opcode_t *)_text;
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+ else {
+ preempt_disable();
+ mod = __module_address((unsigned long)addr);
+ if (mod && within_module_init((unsigned long)addr, mod) &&
+ !within_module_init((unsigned long)scan_end, mod))
+ scan_end = (kprobe_opcode_t *)mod->module_init;
+ else if (mod && within_module_core((unsigned long)addr, mod) &&
+ !within_module_core((unsigned long)scan_end, mod))
+ scan_end = (kprobe_opcode_t *)mod->module_core;
+ preempt_enable();
+ }
+#endif
+ decoded = arm_probe_decode_insn(insn, asi);
+
+ if (decoded == INSN_REJECTED ||
+ is_probed_address_atomic(scan_start, scan_end))
+ return INSN_REJECTED;
+
+ return decoded;
+}
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
new file mode 100644
index 000000000000..d438289646a6
--- /dev/null
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -0,0 +1,35 @@
+/*
+ * arch/arm64/kernel/probes/decode-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_ARM64_H
+#define _ARM_KERNEL_KPROBES_ARM64_H
+
+/*
+ * ARM strongly recommends a limit of 128 bytes between LoadExcl and
+ * StoreExcl instructions in a single thread of execution. So keep the
+ * max atomic context size as 32.
+ */
+#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
+
+enum kprobe_insn {
+ INSN_REJECTED,
+ INSN_GOOD_NO_SLOT,
+ INSN_GOOD,
+};
+
+enum kprobe_insn __kprobes
+arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+
+#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
new file mode 100644
index 000000000000..4ea8433011d0
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -0,0 +1,657 @@
+/*
+ * arch/arm64/kernel/probes/kprobes.c
+ *
+ * Kprobes support for ARM64
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <linux/stringify.h>
+#include <asm/traps.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
+#include <asm/system_misc.h>
+#include <asm/insn.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+ /* prepare insn slot */
+ p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+
+ flush_icache_range((uintptr_t) (p->ainsn.insn),
+ (uintptr_t) (p->ainsn.insn) +
+ MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+
+ /*
+ * Needs restoring of return address after stepping xol.
+ */
+ p->ainsn.restore = (unsigned long) p->addr +
+ sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+ /* This instructions is not executed xol. No need to adjust the PC */
+ p->ainsn.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (p->ainsn.handler)
+ p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+
+ /* single step simulated, now go for post processing */
+ post_kprobe_handler(kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long probe_addr = (unsigned long)p->addr;
+ extern char __start_rodata[];
+ extern char __end_rodata[];
+
+ if (probe_addr & 0x3)
+ return -EINVAL;
+
+ /* copy instruction */
+ p->opcode = le32_to_cpu(*p->addr);
+
+ if (in_exception_text(probe_addr))
+ return -EINVAL;
+ if (probe_addr >= (unsigned long) __start_rodata &&
+ probe_addr <= (unsigned long) __end_rodata)
+ return -EINVAL;
+
+ /* decode instruction */
+ switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) {
+ case INSN_REJECTED: /* insn not supported */
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT: /* insn need simulation */
+ p->ainsn.insn = NULL;
+ break;
+
+ case INSN_GOOD: /* instruction uses slot */
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+ break;
+ };
+
+ /* prepare the instruction */
+ if (p->ainsn.insn)
+ arch_prepare_ss_slot(p);
+ else
+ arch_prepare_simulate(p);
+
+ return 0;
+}
+
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+ void *addrs[1];
+ u32 insns[1];
+
+ addrs[0] = (void *)addr;
+ insns[0] = (u32)opcode;
+
+ return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
+/* arm kprobe: install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, BRK64_OPCODE_KPROBES);
+}
+
+/* disarm kprobe: remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * The D-flag (Debug mask) is set (masked) upon debug exception entry.
+ * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive
+ * probe i.e. when probe hit from kprobe handler context upon
+ * executing the pre/post handlers. In this case we return with
+ * D-flag clear so that single-stepping can be carried-out.
+ *
+ * Leave D-flag set in all other cases.
+ */
+static void __kprobes
+spsr_set_debug_flag(struct pt_regs *regs, int mask)
+{
+ unsigned long spsr = regs->pstate;
+
+ if (mask)
+ spsr |= PSR_D_BIT;
+ else
+ spsr &= ~PSR_D_BIT;
+
+ regs->pstate = spsr;
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ kcb->saved_irqflag = regs->pstate;
+ regs->pstate |= PSR_I_BIT;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ if (kcb->saved_irqflag & PSR_I_BIT)
+ regs->pstate |= PSR_I_BIT;
+ else
+ regs->pstate &= ~PSR_I_BIT;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+ kcb->ss_ctx.ss_pending = true;
+ kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t);
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+ kcb->ss_ctx.ss_pending = false;
+ kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, int reenter)
+{
+ unsigned long slot;
+
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+
+
+ if (p->ainsn.insn) {
+ /* prepare for single stepping */
+ slot = (unsigned long)p->ainsn.insn;
+
+ set_ss_context(kcb, slot); /* mark pending ss */
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 0);
+ else
+ WARN_ON(regs->pstate & PSR_D_BIT);
+
+ /* IRQs and single stepping do not mix well. */
+ kprobes_save_local_irqflag(kcb, regs);
+ kernel_enable_single_step(regs);
+ instruction_pointer_set(regs, slot);
+ } else {
+ /* insn simulation */
+ arch_simulate_insn(p, regs);
+ }
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ setup_singlestep(p, regs, kcb, 1);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr);
+ dump_kprobe(p);
+ BUG();
+ break;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+
+ if (!cur)
+ return;
+
+ /* return addr restore if non-branching insn */
+ if (cur->ainsn.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.restore);
+
+ /* restore back original saved kprobe variables and continue */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ return;
+ }
+ /* call post handler */
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ if (cur->post_handler) {
+ /* post_handler can hit breakpoint and single step
+ * again, so we enable D-flag for recursive exception.
+ */
+ cur->post_handler(cur, regs, 0);
+ }
+
+ reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ instruction_pointer_set(regs, (unsigned long) cur->addr);
+ if (!instruction_pointer(regs))
+ BUG();
+
+ kernel_disable_single_step();
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 1);
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+ }
+ return 0;
+}
+
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+static void __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p, *cur_kprobe;
+ struct kprobe_ctlblk *kcb;
+ unsigned long addr = instruction_pointer(regs);
+
+ kcb = get_kprobe_ctlblk();
+ cur_kprobe = kprobe_running();
+
+ p = get_kprobe((kprobe_opcode_t *) addr);
+
+ if (p) {
+ if (cur_kprobe) {
+ if (reenter_kprobe(p, regs, kcb))
+ return;
+ } else {
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for calling the break_handler below on re-entry,
+ * so get out doing nothing more here.
+ *
+ * pre_handler can hit a breakpoint and can step thru
+ * before return, keep PSTATE D-flag enabled until
+ * pre_handler return back.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ setup_singlestep(p, regs, kcb, 0);
+ return;
+ }
+ }
+ } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) ==
+ BRK64_OPCODE_KPROBES) && cur_kprobe) {
+ /* We probably hit a jprobe. Call its break handler. */
+ if (cur_kprobe->break_handler &&
+ cur_kprobe->break_handler(cur_kprobe, regs)) {
+ setup_singlestep(cur_kprobe, regs, kcb, 0);
+ return;
+ }
+ }
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Return back to original instruction, and continue.
+ */
+}
+
+static int __kprobes
+kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
+{
+ if ((kcb->ss_ctx.ss_pending)
+ && (kcb->ss_ctx.match_addr == addr)) {
+ clear_ss_context(kcb); /* clear pending ss */
+ return DBG_HOOK_HANDLED;
+ }
+ /* not ours, kprobes should ignore it */
+ return DBG_HOOK_ERROR;
+}
+
+int __kprobes
+kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ int retval;
+
+ /* return error if this is not our step */
+ retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
+
+ if (retval == DBG_HOOK_HANDLED) {
+ kprobes_restore_local_irqflag(kcb, regs);
+ kernel_disable_single_step();
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ spsr_set_debug_flag(regs, 1);
+
+ post_kprobe_handler(kcb, regs);
+ }
+
+ return retval;
+}
+
+int __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
+{
+ kprobe_handler(regs);
+ return DBG_HOOK_HANDLED;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ kcb->jprobe_saved_regs = *regs;
+ /*
+ * Since we can't be sure where in the stack frame "stacked"
+ * pass-by-value arguments are stored we just don't try to
+ * duplicate any of the stack. Do not use jprobes on functions that
+ * use more than 64 bytes (after padding each to an 8 byte boundary)
+ * of arguments, or pass individual arguments larger than 16 bytes.
+ */
+
+ instruction_pointer_set(regs, (unsigned long) jp->entry);
+ preempt_disable();
+ pause_graph_tracing();
+ return 1;
+}
+
+void __kprobes jprobe_return(void)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ /*
+ * Jprobe handler return by entering break exception,
+ * encoded same as kprobe, but with following conditions
+ * -a special PC to identify it from the other kprobes.
+ * -restore stack addr to original saved pt_regs
+ */
+ asm volatile(" mov sp, %0 \n"
+ "jprobe_return_break: brk %1 \n"
+ :
+ : "r" (kcb->jprobe_saved_regs.sp),
+ "I" (BRK64_ESR_KPROBES)
+ : "memory");
+
+ unreachable();
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ long stack_addr = kcb->jprobe_saved_regs.sp;
+ long orig_sp = kernel_stack_pointer(regs);
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ extern const char jprobe_return_break[];
+
+ if (instruction_pointer(regs) != (u64) jprobe_return_break)
+ return 0;
+
+ if (orig_sp != stack_addr) {
+ struct pt_regs *saved_regs =
+ (struct pt_regs *)kcb->jprobe_saved_regs.sp;
+ pr_err("current sp %lx does not match saved sp %lx\n",
+ orig_sp, stack_addr);
+ pr_err("Saved registers for jprobe %p\n", jp);
+ show_regs(saved_regs);
+ pr_err("Current registers\n");
+ show_regs(regs);
+ BUG();
+ }
+ unpause_graph_tracing();
+ *regs = kcb->jprobe_saved_regs;
+ preempt_enable_no_resched();
+ return 1;
+}
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ extern char __idmap_text_start[], __idmap_text_end[];
+
+ if ((addr >= (unsigned long)__kprobes_text_start &&
+ addr < (unsigned long)__kprobes_text_end) ||
+ (addr >= (unsigned long)__entry_text_start &&
+ addr < (unsigned long)__entry_text_end) ||
+ (addr >= (unsigned long)__idmap_text_start &&
+ addr < (unsigned long)__idmap_text_end) ||
+ !!search_exception_tables(addr))
+ return true;
+
+
+ return false;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address =
+ (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * return probes installed on them, and/or more than one
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always pushed into the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the (chronologically) first instance's ret_addr
+ * will be the real return address, and all the rest will
+ * point to kretprobe_trampoline.
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (ri->rp && ri->rp->handler) {
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_hash_unlock(current, &flags);
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ return (void *)orig_ret_address;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->regs[30];
+
+ /* replace return addr (x30) with trampoline */
+ regs->regs[30] = (long)&kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+ return 0;
+}
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
new file mode 100644
index 000000000000..5d6e7f14638c
--- /dev/null
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -0,0 +1,81 @@
+/*
+ * trampoline entry and return code for kretprobes.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+
+ .text
+
+ .macro save_all_base_regs
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ add x0, sp, #S_FRAME_SIZE
+ stp lr, x0, [sp, #S_LR]
+ /*
+ * Construct a useful saved PSTATE
+ */
+ mrs x0, nzcv
+ mrs x1, daif
+ orr x0, x0, x1
+ mrs x1, CurrentEL
+ orr x0, x0, x1
+ mrs x1, SPSel
+ orr x0, x0, x1
+ stp xzr, x0, [sp, #S_PC]
+ .endm
+
+ .macro restore_all_base_regs
+ ldr x0, [sp, #S_PSTATE]
+ and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT)
+ msr nzcv, x0
+ ldp x0, x1, [sp, #S_X0]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldp x8, x9, [sp, #S_X8]
+ ldp x10, x11, [sp, #S_X10]
+ ldp x12, x13, [sp, #S_X12]
+ ldp x14, x15, [sp, #S_X14]
+ ldp x16, x17, [sp, #S_X16]
+ ldp x18, x19, [sp, #S_X18]
+ ldp x20, x21, [sp, #S_X20]
+ ldp x22, x23, [sp, #S_X22]
+ ldp x24, x25, [sp, #S_X24]
+ ldp x26, x27, [sp, #S_X26]
+ ldp x28, x29, [sp, #S_X28]
+ .endm
+
+ENTRY(kretprobe_trampoline)
+ sub sp, sp, #S_FRAME_SIZE
+
+ save_all_base_regs
+
+ mov x0, sp
+ bl trampoline_probe_handler
+ /*
+ * Replace trampoline address in lr with actual orig_ret_addr return
+ * address.
+ */
+ mov lr, x0
+
+ restore_all_base_regs
+
+ add sp, sp, #S_FRAME_SIZE
+ ret
+
+ENDPROC(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
new file mode 100644
index 000000000000..8977ce9d009d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -0,0 +1,217 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.c
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "simulate-insn.h"
+
+#define sign_extend(x, signbit) \
+ ((x) | (0 - ((x) & (1 << (signbit)))))
+
+#define bbl_displacement(insn) \
+ sign_extend(((insn) & 0x3ffffff) << 2, 27)
+
+#define bcond_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define cbz_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+#define tbz_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x3fff) << 2, 15)
+
+#define ldr_displacement(insn) \
+ sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+
+static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
+{
+ if (reg < 31)
+ regs->regs[reg] = val;
+}
+
+static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val)
+{
+ if (reg < 31)
+ regs->regs[reg] = lower_32_bits(val);
+}
+
+static inline u64 get_x_reg(struct pt_regs *regs, int reg)
+{
+ if (reg < 31)
+ return regs->regs[reg];
+ else
+ return 0;
+}
+
+static inline u32 get_w_reg(struct pt_regs *regs, int reg)
+{
+ if (reg < 31)
+ return lower_32_bits(regs->regs[reg]);
+ else
+ return 0;
+}
+
+static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+
+ return (opcode & (1 << 31)) ?
+ (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0);
+}
+
+static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+
+ return (opcode & (1 << 31)) ?
+ (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0);
+}
+
+static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+ int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+ return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0;
+}
+
+static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs)
+{
+ int xn = opcode & 0x1f;
+ int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f);
+
+ return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0;
+}
+
+/*
+ * instruction simulation functions
+ */
+void __kprobes
+simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs)
+{
+ long imm, xn, val;
+
+ xn = opcode & 0x1f;
+ imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3);
+ imm = sign_extend(imm, 20);
+ if (opcode & 0x80000000)
+ val = (imm<<12) + (addr & 0xfffffffffffff000);
+ else
+ val = imm + addr;
+
+ set_x_reg(regs, xn, val);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = bbl_displacement(opcode);
+
+ /* Link register is x30 */
+ if (opcode & (1 << 31))
+ set_x_reg(regs, 30, addr + 4);
+
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff))
+ disp = bcond_displacement(opcode);
+
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int xn = (opcode >> 5) & 0x1f;
+
+ /* update pc first in case we're doing a "blr lr" */
+ instruction_pointer_set(regs, get_x_reg(regs, xn));
+
+ /* Link register is x30 */
+ if (((opcode >> 21) & 0x3) == 1)
+ set_x_reg(regs, 30, addr + 4);
+}
+
+void __kprobes
+simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (opcode & (1 << 24)) {
+ if (check_cbnz(opcode, regs))
+ disp = cbz_displacement(opcode);
+ } else {
+ if (check_cbz(opcode, regs))
+ disp = cbz_displacement(opcode);
+ }
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs)
+{
+ int disp = 4;
+
+ if (opcode & (1 << 24)) {
+ if (check_tbnz(opcode, regs))
+ disp = tbz_displacement(opcode);
+ } else {
+ if (check_tbz(opcode, regs))
+ disp = tbz_displacement(opcode);
+ }
+ instruction_pointer_set(regs, addr + disp);
+}
+
+void __kprobes
+simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+ u64 *load_addr;
+ int xn = opcode & 0x1f;
+ int disp;
+
+ disp = ldr_displacement(opcode);
+ load_addr = (u64 *) (addr + disp);
+
+ if (opcode & (1 << 30)) /* x0-x30 */
+ set_x_reg(regs, xn, *load_addr);
+ else /* w0-w30 */
+ set_w_reg(regs, xn, *load_addr);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
+
+void __kprobes
+simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs)
+{
+ s32 *load_addr;
+ int xn = opcode & 0x1f;
+ int disp;
+
+ disp = ldr_displacement(opcode);
+ load_addr = (s32 *) (addr + disp);
+
+ set_x_reg(regs, xn, *load_addr);
+
+ instruction_pointer_set(regs, instruction_pointer(regs) + 4);
+}
diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h
new file mode 100644
index 000000000000..050bde683c2d
--- /dev/null
+++ b/arch/arm64/kernel/probes/simulate-insn.h
@@ -0,0 +1,28 @@
+/*
+ * arch/arm64/kernel/probes/simulate-insn.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H
+
+void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs);
+void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs);
+
+#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 236120fc4dbc..5dd9b572259f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -87,6 +87,16 @@ void arch_cpu_idle(void)
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
+void arch_cpu_idle_enter(void)
+{
+ idle_notifier_call_chain(IDLE_START);
+}
+
+void arch_cpu_idle_exit(void)
+{
+ idle_notifier_call_chain(IDLE_END);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
@@ -182,7 +192,7 @@ static void show_data(unsigned long addr, int nbytes, const char *name)
* don't attempt to dump non-kernel addresses or
* values that are probably just small negative numbers
*/
- if (addr < PAGE_OFFSET || addr > -256UL)
+ if (addr < KIMAGE_VADDR || addr > -256UL)
return;
printk("\n%s: %#lx:\n", name, addr);
@@ -207,29 +217,23 @@ static void show_data(unsigned long addr, int nbytes, const char *name)
if (probe_kernel_address(p, data)) {
printk(" ********");
} else {
- printk(" %08x", data);
+ pr_cont(" %08x", data);
}
++p;
}
- printk("\n");
+ pr_cont("\n");
}
}
static void show_extra_register_data(struct pt_regs *regs, int nbytes)
{
mm_segment_t fs;
- unsigned int i;
fs = get_fs();
set_fs(KERNEL_DS);
show_data(regs->pc - nbytes, nbytes * 2, "PC");
show_data(regs->regs[30] - nbytes, nbytes * 2, "LR");
show_data(regs->sp - nbytes, nbytes * 2, "SP");
- for (i = 0; i < 30; i++) {
- char name[4];
- snprintf(name, sizeof(name), "X%u", i);
- show_data(regs->regs[i] - nbytes, nbytes * 2, name);
- }
set_fs(fs);
}
@@ -260,7 +264,7 @@ void __show_regs(struct pt_regs *regs)
printk("\n");
}
if (!user_mode(regs))
- show_extra_register_data(regs, 128);
+ show_extra_register_data(regs, 64);
printk("\n");
}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index dbff1ab8e92f..2f8094ab1cef 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -21,7 +21,6 @@
#include <linux/delay.h>
#include <linux/mm.h>
#include <linux/psci.h>
-#include <linux/slab.h>
#include <uapi/linux/psci.h>
@@ -29,76 +28,10 @@
#include <asm/cpu_ops.h>
#include <asm/errno.h>
#include <asm/smp_plat.h>
-#include <asm/suspend.h>
-
-static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
-
-static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
-{
- int i, ret, count = 0;
- u32 *psci_states;
- struct device_node *state_node, *cpu_node;
-
- cpu_node = of_get_cpu_node(cpu, NULL);
- if (!cpu_node)
- return -ENODEV;
-
- /*
- * If the PSCI cpu_suspend function hook has not been initialized
- * idle states must not be enabled, so bail out
- */
- if (!psci_ops.cpu_suspend)
- return -EOPNOTSUPP;
-
- /* Count idle states */
- while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
- count))) {
- count++;
- of_node_put(state_node);
- }
-
- if (!count)
- return -ENODEV;
-
- psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
- if (!psci_states)
- return -ENOMEM;
-
- for (i = 0; i < count; i++) {
- u32 state;
-
- state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-
- ret = of_property_read_u32(state_node,
- "arm,psci-suspend-param",
- &state);
- if (ret) {
- pr_warn(" * %s missing arm,psci-suspend-param property\n",
- state_node->full_name);
- of_node_put(state_node);
- goto free_mem;
- }
-
- of_node_put(state_node);
- pr_debug("psci-power-state %#x index %d\n", state, i);
- if (!psci_power_state_is_valid(state)) {
- pr_warn("Invalid PSCI power state %#x\n", state);
- ret = -EINVAL;
- goto free_mem;
- }
- psci_states[i] = state;
- }
- /* Idle states parsed correctly, initialize per-cpu pointer */
- per_cpu(psci_power_state, cpu) = psci_states;
- return 0;
-
-free_mem:
- kfree(psci_states);
- return ret;
-}
static int __init cpu_psci_cpu_init(unsigned int cpu)
{
+ pr_info("Initializing psci_cpu_init\n");
return 0;
}
@@ -166,7 +99,7 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
do {
err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
- pr_info("CPU%d killed (polled %d ms)\n", cpu,
+ pr_debug("CPU%d killed (polled %d ms)\n", cpu,
jiffies_to_msecs(jiffies - start));
return 0;
}
@@ -180,38 +113,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
}
#endif
-static int psci_suspend_finisher(unsigned long index)
-{
- u32 *state = __this_cpu_read(psci_power_state);
-
- return psci_ops.cpu_suspend(state[index - 1],
- virt_to_phys(cpu_resume));
-}
-
-static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
-{
- int ret;
- u32 *state = __this_cpu_read(psci_power_state);
- /*
- * idle state index 0 corresponds to wfi, should never be called
- * from the cpu_suspend operations
- */
- if (WARN_ON_ONCE(!index))
- return -EINVAL;
-
- if (!psci_power_state_loses_context(state[index - 1]))
- ret = psci_ops.cpu_suspend(state[index - 1], 0);
- else
- ret = cpu_suspend(index, psci_suspend_finisher);
-
- return ret;
-}
-
const struct cpu_operations cpu_psci_ops = {
.name = "psci",
#ifdef CONFIG_CPU_IDLE
- .cpu_init_idle = cpu_psci_cpu_init_idle,
- .cpu_suspend = cpu_psci_cpu_suspend,
+ .cpu_init_idle = psci_cpu_init_idle,
+ .cpu_suspend = psci_cpu_suspend_enter,
#endif
.cpu_init = cpu_psci_cpu_init,
.cpu_prepare = cpu_psci_cpu_prepare,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index beff0fb11b6b..c67bd311e815 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -49,6 +49,106 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+#define GPR_OFFSET_NAME(r) \
+ {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])},
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(pc),
+ REG_OFFSET_NAME(pstate),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ return ((addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
+}
+
/*
* TODO: does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -1254,13 +1354,13 @@ static void tracehook_report_syscall(struct pt_regs *regs,
asmlinkage int syscall_trace_enter(struct pt_regs *regs)
{
- /* Do the secure computing check first; failures should be fast. */
- if (secure_computing() == -1)
- return -1;
-
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+ /* Do the secure computing after ptrace; failures should be fast. */
+ if (secure_computing(NULL) == -1)
+ return -1;
+
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, regs->syscallno);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 4bc5bc9463b8..01f259ec5700 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -44,6 +44,7 @@
#include <linux/of_platform.h>
#include <linux/efi.h>
#include <linux/psci.h>
+#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <asm/acpi.h>
@@ -65,8 +66,15 @@
#include <asm/xen/hypervisor.h>
#include <asm/mmu_context.h>
+unsigned int boot_reason;
+EXPORT_SYMBOL(boot_reason);
+
+unsigned int cold_boot;
+EXPORT_SYMBOL(cold_boot);
+
phys_addr_t __fdt_pointer __initdata;
+const char *machine_name;
/*
* Standard memory resources
*/
@@ -176,7 +184,6 @@ static void __init smp_build_mpidr_hash(void)
*/
if (mpidr_hash_size() > 4 * num_possible_cpus())
pr_warn("Large number of MPIDR hash buckets detected\n");
- __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -194,7 +201,11 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
cpu_relax();
}
- dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
+ machine_name = of_flat_dt_get_machine_name();
+ if (machine_name) {
+ dump_stack_set_arch_desc("%s (DT)", machine_name);
+ pr_info("Machine: %s\n", machine_name);
+ }
}
static void __init request_standard_resources(void)
@@ -290,6 +301,8 @@ static inline void __init relocate_initrd(void)
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
+void __init __weak init_random_pool(void) { }
+
void __init setup_arch(char **cmdline_p)
{
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
@@ -368,6 +381,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ init_random_pool();
if (boot_args[1] || boot_args[2] || boot_args[3]) {
pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
"\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
@@ -401,7 +415,13 @@ static int __init topology_init(void)
return 0;
}
-subsys_initcall(topology_init);
+postcore_initcall(topology_init);
+
+void arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+ pdev->archdata.dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->archdata.dma_mask;
+}
/*
* Dump out kernel offset information on panic.
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8eafdbc7cb8..0bed9a899850 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -25,6 +25,7 @@
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
@@ -402,6 +403,9 @@ static void do_signal(struct pt_regs *regs)
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned int thread_flags)
{
+ /* Check valid user FS if needed */
+ addr_limit_user_check();
+
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 107335637390..666363d127e5 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -28,42 +28,7 @@
#include <asm/signal32.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
-
-struct compat_sigcontext {
- /* We always set these two fields to 0 */
- compat_ulong_t trap_no;
- compat_ulong_t error_code;
-
- compat_ulong_t oldmask;
- compat_ulong_t arm_r0;
- compat_ulong_t arm_r1;
- compat_ulong_t arm_r2;
- compat_ulong_t arm_r3;
- compat_ulong_t arm_r4;
- compat_ulong_t arm_r5;
- compat_ulong_t arm_r6;
- compat_ulong_t arm_r7;
- compat_ulong_t arm_r8;
- compat_ulong_t arm_r9;
- compat_ulong_t arm_r10;
- compat_ulong_t arm_fp;
- compat_ulong_t arm_ip;
- compat_ulong_t arm_sp;
- compat_ulong_t arm_lr;
- compat_ulong_t arm_pc;
- compat_ulong_t arm_cpsr;
- compat_ulong_t fault_address;
-};
-
-struct compat_ucontext {
- compat_ulong_t uc_flags;
- compat_uptr_t uc_link;
- compat_stack_t uc_stack;
- struct compat_sigcontext uc_mcontext;
- compat_sigset_t uc_sigmask;
- int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
- compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
-};
+#include <asm/vdso.h>
struct compat_vfp_sigframe {
compat_ulong_t magic;
@@ -91,16 +56,6 @@ struct compat_aux_sigframe {
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-struct compat_sigframe {
- struct compat_ucontext uc;
- compat_ulong_t retcode[2];
-};
-
-struct compat_rt_sigframe {
- struct compat_siginfo info;
- struct compat_sigframe sig;
-};
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
@@ -484,14 +439,27 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
+#ifdef CONFIG_VDSO32
+ void *vdso_base = current->mm->context.vdso;
+ void *trampoline =
+ (ka->sa.sa_flags & SA_SIGINFO
+ ? (thumb
+ ? VDSO_SYMBOL(vdso_base, compat_rt_sigreturn_thumb)
+ : VDSO_SYMBOL(vdso_base, compat_rt_sigreturn_arm))
+ : (thumb
+ ? VDSO_SYMBOL(vdso_base, compat_sigreturn_thumb)
+ : VDSO_SYMBOL(vdso_base, compat_sigreturn_arm)));
+
+ retcode = ptr_to_compat(trampoline) + thumb;
+#else
+ void *sigreturn_base = current->mm->context.vdso;
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
idx += 3;
- retcode = AARCH32_VECTORS_BASE +
- AARCH32_KERN_SIGRET_CODE_OFFSET +
- (idx << 2) + thumb;
+ retcode = ptr_to_compat(sigreturn_base) + (idx << 2) + thumb;
+#endif
}
regs->regs[0] = usig;
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
new file mode 100644
index 000000000000..6ecda4d84cd5
--- /dev/null
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -0,0 +1,67 @@
+/*
+ * sigreturn trampolines for AArch32.
+ *
+ * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * AArch32 sigreturn code
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+
+#include <asm/unistd.h>
+
+ .globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+ /*
+ * ARM Code
+ */
+ // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3
+ // svc #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef
+
+ /*
+ * Thumb code
+ */
+ // svc #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0x27
+ // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0xdf
+
+ /*
+ * ARM code
+ */
+ // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3
+ // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef
+
+ /*
+ * Thumb code
+ */
+ // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0x27
+ // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0xdf
+
+ .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f586f7c875e2..100f92f13113 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -49,39 +49,32 @@
orr \dst, \dst, \mask // dst|=(aff3>>rs3)
.endm
/*
- * Save CPU state for a suspend and execute the suspend finisher.
- * On success it will return 0 through cpu_resume - ie through a CPU
- * soft/hard reboot from the reset vector.
- * On failure it returns the suspend finisher return value or force
- * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
- * is not allowed to return, if it does this must be considered failure).
- * It saves callee registers, and allocates space on the kernel stack
- * to save the CPU specific registers + some other data for resume.
+ * Save CPU state in the provided sleep_stack_data area, and publish its
+ * location for cpu_resume()'s use in sleep_save_stash.
*
- * x0 = suspend finisher argument
- * x1 = suspend finisher function pointer
+ * cpu_resume() will restore this saved state, and return. Because the
+ * link-register is saved and restored, it will appear to return from this
+ * function. So that the caller can tell the suspend/resume paths apart,
+ * __cpu_suspend_enter() will always return a non-zero value, whereas the
+ * path through cpu_resume() will return 0.
+ *
+ * x0 = struct sleep_stack_data area
*/
ENTRY(__cpu_suspend_enter)
- stp x29, lr, [sp, #-96]!
- stp x19, x20, [sp,#16]
- stp x21, x22, [sp,#32]
- stp x23, x24, [sp,#48]
- stp x25, x26, [sp,#64]
- stp x27, x28, [sp,#80]
- /*
- * Stash suspend finisher and its argument in x20 and x19
- */
- mov x19, x0
- mov x20, x1
+ stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
+ stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
+ stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
+ stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
+ stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
+ stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
+
+ /* save the sp in cpu_suspend_ctx */
mov x2, sp
- sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
- mov x0, sp
- /*
- * x0 now points to struct cpu_suspend_ctx allocated on the stack
- */
- str x2, [x0, #CPU_CTX_SP]
- ldr x1, =sleep_save_sp
- ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
+ str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
+
+ /* find the mpidr_hash */
+ ldr x1, =sleep_save_stash
+ ldr x1, [x1]
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
@@ -93,70 +86,36 @@ ENTRY(__cpu_suspend_enter)
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x1, x1, x8, lsl #3
- bl __cpu_suspend_save
- /*
- * Grab suspend finisher in x20 and its argument in x19
- */
- mov x0, x19
- mov x1, x20
- /*
- * We are ready for power down, fire off the suspend finisher
- * in x1, with argument in x0
- */
- blr x1
- /*
- * Never gets here, unless suspend finisher fails.
- * Successful cpu_suspend should return from cpu_resume, returning
- * through this code path is considered an error
- * If the return value is set to 0 force x0 = -EOPNOTSUPP
- * to make sure a proper error condition is propagated
- */
- cmp x0, #0
- mov x3, #-EOPNOTSUPP
- csel x0, x3, x0, eq
- add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
+
+ str x0, [x1]
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
+ stp x29, lr, [sp, #-16]!
+ bl cpu_do_suspend
+ ldp x29, lr, [sp], #16
+ mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
.ltorg
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
- .pushsection ".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
- ldr x3, =cpu_resume_after_mmu
- msr sctlr_el1, x0 // restore sctlr_el1
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
- br x3 // global jump to virtual address
-ENDPROC(cpu_resume_mmu)
- .popsection
-cpu_resume_after_mmu:
- mov x0, #0 // return zero on success
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
- ret
-ENDPROC(cpu_resume_after_mmu)
-
+ .pushsection ".idmap.text", "ax"
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
+ /* enable the MMU early - so we can access sleep_save_stash by va */
+ adr_l lr, __enable_mmu /* __cpu_setup will return here */
+ adr_l x27, _resume_switched /* __enable_mmu will branch here */
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ b __cpu_setup
+ENDPROC(cpu_resume)
+
+_resume_switched:
+ ldr x8, =_cpu_resume
+ br x8
+ENDPROC(_resume_switched)
+ .ltorg
+ .popsection
+
+ENTRY(_cpu_resume)
mrs x1, mpidr_el1
adrp x8, mpidr_hash
add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -166,17 +125,29 @@ ENTRY(cpu_resume)
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
/* x7 contains hash index, let's use it to grab context pointer */
- ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+ ldr_l x0, sleep_save_stash
ldr x0, [x0, x7, lsl #3]
+ add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
- /* load physical address of identity map page table in x1 */
- adrp x1, idmap_pg_dir
mov sp, x2
/*
- * cpu_do_resume expects x0 to contain context physical address
- * pointer and x1 to contain physical address of 1:1 page tables
+ * cpu_do_resume expects x0 to contain context address pointer
*/
- bl cpu_do_resume // PC relative jump, MMU off
- b cpu_resume_mmu // Resume MMU, never returns
-ENDPROC(cpu_resume)
+ bl cpu_do_resume
+
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_task_stack_below
+#endif
+
+ ldp x19, x20, [x29, #16]
+ ldp x21, x22, [x29, #32]
+ ldp x23, x24, [x29, #48]
+ ldp x25, x26, [x29, #64]
+ ldp x27, x28, [x29, #80]
+ ldp x29, lr, [x29]
+ mov x0, #0
+ ret
+ENDPROC(_cpu_resume)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a90c1f184792..16f97cdaaeae 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -53,6 +53,8 @@
#include <asm/tlbflush.h>
#include <asm/ptrace.h>
#include <asm/virt.h>
+#include <asm/edac.h>
+#include <soc/qcom/minidump.h>
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
@@ -73,7 +75,8 @@ enum ipi_msg_type {
IPI_CPU_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
- IPI_WAKEUP
+ IPI_WAKEUP,
+ IPI_CPU_BACKTRACE,
};
/*
@@ -150,6 +153,8 @@ asmlinkage notrace void secondary_start_kernel(void)
cpu = task_cpu(current);
set_my_cpu_offset(per_cpu_offset(cpu));
+ pr_debug("CPU%u: Booted secondary processor\n", cpu);
+
/*
* All kernel threads share the same mm context; grab a
* reference and switch to it.
@@ -184,16 +189,16 @@ asmlinkage notrace void secondary_start_kernel(void)
/*
* Enable GIC and timers.
*/
- notify_cpu_starting(cpu);
-
smp_store_cpu_info(cpu);
+ notify_cpu_starting(cpu);
+
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
* before we continue.
*/
- pr_info("CPU%u: Booted secondary processor [%08x]\n",
+ pr_debug("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -278,7 +283,7 @@ void __cpu_die(unsigned int cpu)
pr_crit("CPU%u: cpu didn't die\n", cpu);
return;
}
- pr_notice("CPU%u: shutdown\n", cpu);
+ pr_debug("CPU%u: shutdown\n", cpu);
/*
* Now that the dying CPU is beyond the point of no return w.r.t.
@@ -300,7 +305,7 @@ void __cpu_die(unsigned int cpu)
* of the other hotplug-cpu capable cores, so presumably coming
* out of idle fixes this.
*/
-void cpu_die(void)
+void __ref cpu_die(void)
{
unsigned int cpu = smp_processor_id();
@@ -318,7 +323,16 @@ void cpu_die(void)
*/
cpu_ops[cpu]->cpu_die(cpu);
- BUG();
+ /*
+ * Do not return to the idle loop - jump back to the secondary
+ * cpu initialisation. There's some initialisation which needs
+ * to be repeated to undo the effects of taking the CPU offline.
+ */
+
+ asm volatile("mov sp, %0\n"
+ "mov x29, #0\n"
+ "b secondary_start_kernel"
+ : : "r" (task_stack_page(current) + THREAD_START_SP));
}
#endif
@@ -487,6 +501,18 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
#else
#define acpi_table_parse_madt(...) do { } while (0)
#endif
+void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+DEFINE_PER_CPU(bool, pending_ipi);
+
+void smp_cross_call_common(const struct cpumask *cpumask, unsigned int func)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, cpumask)
+ per_cpu(pending_ipi, cpu) = true;
+
+ __smp_cross_call(cpumask, func);
+}
/*
* Enumerate the possible CPU set from the device tree and build the
@@ -635,8 +661,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
}
}
-void (*__smp_cross_call)(const struct cpumask *, unsigned int);
-
void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
{
__smp_cross_call = fn;
@@ -649,11 +673,17 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
S(IPI_IRQ_WORK, "IRQ work interrupts"),
- S(IPI_WAKEUP, "CPU wake-up interrupts"),
+ S(IPI_WAKEUP, "CPU wakeup interrupts"),
+ S(IPI_CPU_BACKTRACE, "CPU backtrace"),
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, target)
+ per_cpu(pending_ipi, cpu) = true;
+
trace_ipi_raise(target, ipi_types[ipinr]);
__smp_cross_call(target, ipinr);
}
@@ -685,12 +715,12 @@ u64 smp_irq_stat_cpu(unsigned int cpu)
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
- smp_cross_call(mask, IPI_CALL_FUNC);
+ smp_cross_call_common(mask, IPI_CALL_FUNC);
}
void arch_send_call_function_single_ipi(int cpu)
{
- smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
+ smp_cross_call_common(cpumask_of(cpu), IPI_CALL_FUNC);
}
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
@@ -710,27 +740,100 @@ void arch_irq_work_raise(void)
static DEFINE_RAW_SPINLOCK(stop_lock);
+DEFINE_PER_CPU(struct pt_regs, regs_before_stop);
+
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
*/
-static void ipi_cpu_stop(unsigned int cpu)
+static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs)
{
if (system_state == SYSTEM_BOOTING ||
system_state == SYSTEM_RUNNING) {
+ per_cpu(regs_before_stop, cpu) = *regs;
raw_spin_lock(&stop_lock);
pr_crit("CPU%u: stopping\n", cpu);
+ show_regs(regs);
dump_stack();
+ dump_stack_minidump(regs->sp);
+ arm64_check_cache_ecc(NULL);
raw_spin_unlock(&stop_lock);
}
set_cpu_online(cpu, false);
+ flush_cache_all();
local_irq_disable();
while (1)
cpu_relax();
}
+static cpumask_t backtrace_mask;
+static DEFINE_RAW_SPINLOCK(backtrace_lock);
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
+static void smp_send_all_cpu_backtrace(void)
+{
+ unsigned int this_cpu = smp_processor_id();
+ int i;
+
+ if (test_and_set_bit(0, &backtrace_flag))
+ /*
+ * If there is already a trigger_all_cpu_backtrace() in progress
+ * (backtrace_flag == 1), don't output double cpu dump infos.
+ */
+ return;
+
+ cpumask_copy(&backtrace_mask, cpu_online_mask);
+ cpumask_clear_cpu(this_cpu, &backtrace_mask);
+
+ pr_info("Backtrace for cpu %d (current):\n", this_cpu);
+ dump_stack();
+
+ pr_info("\nsending IPI to all other CPUs:\n");
+ if (!cpumask_empty(&backtrace_mask))
+ smp_cross_call_common(&backtrace_mask, IPI_CPU_BACKTRACE);
+
+ /* Wait for up to 10 seconds for all other CPUs to do the backtrace */
+ for (i = 0; i < 10 * 1000; i++) {
+ if (cpumask_empty(&backtrace_mask))
+ break;
+ mdelay(1);
+ }
+
+ clear_bit(0, &backtrace_flag);
+ smp_mb__after_atomic();
+}
+
+/*
+ * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace()
+ */
+static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs)
+{
+ if (cpumask_test_cpu(cpu, &backtrace_mask)) {
+ raw_spin_lock(&backtrace_lock);
+ pr_warn("IPI backtrace for cpu %d\n", cpu);
+ show_regs(regs);
+ raw_spin_unlock(&backtrace_lock);
+ cpumask_clear_cpu(cpu, &backtrace_mask);
+ }
+}
+
+#ifdef CONFIG_SMP
+void arch_trigger_all_cpu_backtrace(void)
+{
+ smp_send_all_cpu_backtrace();
+}
+#else
+void arch_trigger_all_cpu_backtrace(void)
+{
+ dump_stack();
+}
+#endif
+
+
/*
* Main handler for inter-processor interrupts
*/
@@ -757,7 +860,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
case IPI_CPU_STOP:
irq_enter();
- ipi_cpu_stop(cpu);
+ ipi_cpu_stop(cpu, regs);
irq_exit();
break;
@@ -777,6 +880,10 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif
+ case IPI_CPU_BACKTRACE:
+ ipi_cpu_backtrace(cpu, regs);
+ break;
+
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
case IPI_WAKEUP:
WARN_ONCE(!acpi_parking_protocol_valid(cpu),
@@ -792,18 +899,21 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
if ((unsigned)ipinr < NR_IPI)
trace_ipi_exit_rcuidle(ipi_types[ipinr]);
+
+ per_cpu(pending_ipi, cpu) = false;
set_irq_regs(old_regs);
}
void smp_send_reschedule(int cpu)
{
- smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
+ BUG_ON(cpu_is_offline(cpu));
+ smp_cross_call_common(cpumask_of(cpu), IPI_RESCHEDULE);
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
- smp_cross_call(mask, IPI_TIMER);
+ smp_cross_call_common(mask, IPI_TIMER);
}
#endif
@@ -828,7 +938,7 @@ void smp_send_stop(void)
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
- smp_cross_call(&mask, IPI_CPU_STOP);
+ smp_cross_call_common(&mask, IPI_CPU_STOP);
}
/* Wait up to one second for other CPUs to stop */
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 2ccb883353d9..303d571702ea 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -30,7 +30,8 @@
#include <asm/smp_plat.h>
extern void secondary_holding_pen(void);
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
+volatile unsigned long __section(".mmuoff.data.read")
+secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 75a856568813..0edcd34b45d2 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -15,6 +15,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/ftrace.h>
@@ -63,10 +64,14 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
if (fp < low || fp > high || fp & 0xf)
return -EINVAL;
+ kasan_disable_current();
+
frame->sp = fp + 0x10;
frame->fp = *(unsigned long *)(fp);
frame->pc = *(unsigned long *)(fp + 8);
+ kasan_enable_current();
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk && tsk->ret_stack &&
(frame->pc == (unsigned long)return_to_handler)) {
@@ -157,7 +162,8 @@ static int save_trace(struct stackframe *frame, void *d)
return trace->nr_entries >= trace->max_entries;
}
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+static noinline void __save_stack_trace(struct task_struct *tsk,
+ struct stack_trace *trace, unsigned int nosched)
{
struct stack_trace_data data;
struct stackframe frame;
@@ -167,17 +173,18 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
data.trace = trace;
data.skip = trace->skip;
+ data.no_sched_functions = nosched;
if (tsk != current) {
- data.no_sched_functions = 1;
frame.fp = thread_saved_fp(tsk);
frame.sp = thread_saved_sp(tsk);
frame.pc = thread_saved_pc(tsk);
} else {
- data.no_sched_functions = 0;
+ /* We don't want this function nor the caller */
+ data.skip += 2;
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_stack_pointer;
- frame.pc = (unsigned long)save_stack_trace_tsk;
+ frame.pc = (unsigned long)__save_stack_trace;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = tsk->curr_ret_stack;
@@ -189,10 +196,17 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
put_task_stack(tsk);
}
+EXPORT_SYMBOL(save_stack_trace_tsk);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ __save_stack_trace(tsk, trace, 1);
+}
void save_stack_trace(struct stack_trace *trace)
{
- save_stack_trace_tsk(current, trace);
+ __save_stack_trace(current, trace, 0);
}
+
EXPORT_SYMBOL_GPL(save_stack_trace);
#endif
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index e7a96462ca2d..468b939f3471 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -13,30 +13,11 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
/*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: CPU context virtual address
- * save_ptr: address of the location where the context physical address
- * must be saved
+ * This is allocated by cpu_suspend_init(), and used to store a pointer to
+ * the 'struct sleep_stack_data' the contains a particular CPUs state.
*/
-void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
- phys_addr_t *save_ptr)
-{
- *save_ptr = virt_to_phys(ptr);
-
- cpu_do_suspend(ptr);
- /*
- * Only flush the context that must be retrieved with the MMU
- * off. VA primitives ensure the flush is applied to all
- * cache levels so context is pushed to DRAM.
- */
- __flush_dcache_area(ptr, sizeof(*ptr));
- __flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
+unsigned long *sleep_save_stash;
/*
* This hook is provided so that cpu_suspend code can restore HW
@@ -54,6 +35,24 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
hw_breakpoint_restore = hw_bp_restore;
}
+void notrace __cpu_suspend_exit(void)
+{
+ /*
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+ * state before we can possibly return to userspace.
+ */
+ cpu_uninstall_idmap();
+
+ /*
+ * Restore HW breakpoint registers to sane values
+ * before debug exceptions are possibly reenabled
+ * through local_dbg_restore.
+ */
+ if (hw_breakpoint_restore)
+ hw_breakpoint_restore(NULL);
+}
+
/*
* cpu_suspend
*
@@ -63,8 +62,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
*/
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
- int ret;
+ int ret = 0;
unsigned long flags;
+ struct sleep_stack_data state;
/*
* From this point debug exceptions are disabled to prevent
@@ -80,20 +80,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
*/
pause_graph_tracing();
- /*
- * mm context saved on the stack, it will be restored when
- * the cpu comes out of reset through the identity mapped
- * page tables, so that the thread address space is properly
- * set-up on function return.
- */
- ret = __cpu_suspend_enter(arg, fn);
- if (ret == 0) {
- /*
- * We are resuming from reset with the idmap active in TTBR0_EL1.
- * We must uninstall the idmap and restore the expected MMU
- * state before we can possibly return to userspace.
- */
- cpu_uninstall_idmap();
+ if (__cpu_suspend_enter(&state)) {
+ /* Call the suspend finisher */
+ ret = fn(arg);
/*
* PSTATE was not saved over suspend/resume, re-enable any
@@ -108,8 +97,10 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* before debug exceptions are possibly reenabled
* through local_dbg_restore.
*/
- if (hw_breakpoint_restore)
- hw_breakpoint_restore(NULL);
+ if (!ret)
+ ret = -EOPNOTSUPP;
+ } else {
+ __cpu_suspend_exit();
}
unpause_graph_tracing();
@@ -124,22 +115,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
return ret;
}
-struct sleep_save_sp sleep_save_sp;
-
static int __init cpu_suspend_init(void)
{
- void *ctx_ptr;
-
/* ctx_ptr is an array of physical addresses */
- ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+ sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+ GFP_KERNEL);
- if (WARN_ON(!ctx_ptr))
+ if (WARN_ON(!sleep_save_stash))
return -ENOMEM;
- sleep_save_sp.save_ptr_stash = ctx_ptr;
- sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
- __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
return 0;
}
early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 7758f7ff131b..d0e5fe5fbf22 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,14 +19,36 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched_energy.h>
#include <asm/cputype.h>
#include <asm/topology.h>
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
#ifdef CONFIG_CPU_FREQ
@@ -181,6 +203,46 @@ static int __init parse_cluster(struct device_node *cluster, int depth)
return 0;
}
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
+static DEFINE_PER_CPU(unsigned long, cpu_efficiency) = SCHED_CAPACITY_SCALE;
+
+unsigned long arch_get_cpu_efficiency(int cpu)
+{
+ return per_cpu(cpu_efficiency, cpu);
+}
+EXPORT_SYMBOL(arch_get_cpu_efficiency);
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
static int __init parse_dt_topology(void)
{
struct device_node *cn, *map;
@@ -220,6 +282,107 @@ out:
return ret;
}
+static void __init parse_dt_cpu_power(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn;
+ unsigned long min_capacity = ULONG_MAX;
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int cpu;
+
+ __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+ GFP_NOWAIT);
+
+ for_each_possible_cpu(cpu) {
+ const u32 *rate;
+ int len;
+ u32 efficiency;
+
+ /* Too early to use cpu->of_node */
+ cn = of_get_cpu_node(cpu, NULL);
+ if (!cn) {
+ pr_err("Missing device node for CPU %d\n", cpu);
+ continue;
+ }
+
+ /*
+ * The CPU efficiency value passed from the device tree
+ * overrides the value defined in the table_efficiency[]
+ */
+ if (of_property_read_u32(cn, "efficiency", &efficiency) < 0) {
+
+ for (cpu_eff = table_efficiency;
+ cpu_eff->compatible; cpu_eff++)
+
+ if (of_device_is_compatible(cn,
+ cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL) {
+ pr_warn("%s: Unknown CPU type\n",
+ cn->full_name);
+ continue;
+ }
+
+ efficiency = cpu_eff->efficiency;
+ }
+
+ per_cpu(cpu_efficiency, cpu) = efficiency;
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s: Missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity(cpu) = capacity;
+ }
+
+ /* If min and max capacities are equal we bypass the update of the
+ * cpu_scale because all CPUs have the same capacity. Otherwise, we
+ * compute a middle_capacity factor that will ensure that the capacity
+ * of an 'average' CPU of the system will be as close as possible to
+ * SCHED_CAPACITY_SCALE, which is the default value, but with the
+ * constraint explained near table_efficiency[].
+ */
+ if (min_capacity == max_capacity)
+ return;
+ else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+ middle_capacity = (min_capacity + max_capacity)
+ >> (SCHED_CAPACITY_SHIFT+1);
+ else
+ middle_capacity = ((max_capacity / 3)
+ >> (SCHED_CAPACITY_SHIFT-1)) + 1;
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+ if (!cpu_capacity(cpu))
+ return;
+
+ set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+ pr_info("CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
/*
* cpu topology table
*/
@@ -281,14 +444,14 @@ static void update_cpu_capacity(unsigned int cpu)
{
unsigned long capacity = SCHED_CAPACITY_SCALE;
- if (cpu_core_energy(cpu)) {
+ if (sched_energy_aware && cpu_core_energy(cpu)) {
int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
}
set_capacity_scale(cpu, capacity);
- pr_info("CPU%d: update cpu_capacity %lu\n",
+ pr_debug("CPU%d: update cpu_capacity %lu\n",
cpu, arch_scale_cpu_capacity(NULL, cpu));
}
@@ -353,6 +516,7 @@ void store_cpu_topology(unsigned int cpuid)
topology_populated:
update_siblings_masks(cpuid);
+ update_cpu_power(cpuid);
update_cpu_capacity(cpuid);
}
@@ -374,18 +538,33 @@ static void __init reset_cpu_topology(void)
}
}
+static void __init reset_cpu_power(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ set_power_scale(cpu, SCHED_CAPACITY_SCALE);
+}
+
void __init init_cpu_topology(void)
{
+ int cpu;
+
reset_cpu_topology();
/*
* Discard anything that was parsed if we hit an error so we
* don't use partial information.
*/
- if (of_have_populated_dt() && parse_dt_topology())
+ if (of_have_populated_dt() && parse_dt_topology()) {
reset_cpu_topology();
- else
+ } else {
set_sched_topology(arm64_topology);
+ for_each_possible_cpu(cpu)
+ update_siblings_masks(cpu);
+ }
+ reset_cpu_power();
+ parse_dt_cpu_power();
init_sched_energy_costs();
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 47454ee143df..416aea2c6719 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -43,6 +43,10 @@
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
+#include <asm/esr.h>
+#include <asm/edac.h>
+
+#include <trace/events/exception.h>
static const char *handler[]= {
"Synchronous Abort",
@@ -151,7 +155,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
unsigned long irq_stack_ptr;
int skip;
- pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+ pr_debug("%s(regs = %pK tsk = %pK)\n", __func__, regs, tsk);
if (!tsk)
tsk = current;
@@ -267,9 +271,6 @@ static int __die(const char *str, int err, struct pt_regs *regs)
end_of_stack(tsk));
if (!user_mode(regs) || in_interrupt()) {
- dump_mem(KERN_EMERG, "Stack: ", regs->sp,
- THREAD_SIZE + (unsigned long)task_stack_page(tsk),
- compat_user_mode(regs));
dump_backtrace(regs, tsk);
dump_instr(KERN_EMERG, regs);
}
@@ -277,40 +278,73 @@ static int __die(const char *str, int err, struct pt_regs *regs)
return ret;
}
-static DEFINE_RAW_SPINLOCK(die_lock);
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
-/*
- * This function is protected against re-entrancy.
- */
-void die(const char *str, struct pt_regs *regs, int err)
+static unsigned long oops_begin(void)
{
- int ret;
+ int cpu;
unsigned long flags;
- raw_spin_lock_irqsave(&die_lock, flags);
-
oops_enter();
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!arch_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ arch_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
console_verbose();
bust_spinlocks(1);
- ret = __die(str, err, regs);
+ return flags;
+}
+static void oops_end(unsigned long flags, struct pt_regs *regs, int notify)
+{
if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
+ die_owner = -1;
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ die_nest_count--;
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ arch_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
oops_exit();
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
+ if (notify != NOTIFY_STOP)
+ do_exit(SIGSEGV);
+}
- raw_spin_unlock_irqrestore(&die_lock, flags);
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
+{
+ enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+ unsigned long flags = oops_begin();
+ int ret;
- if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ if (!user_mode(regs))
+ bug_type = report_bug(regs->pc, regs);
+ if (bug_type != BUG_TRAP_TYPE_NONE)
+ str = "Oops - BUG";
+
+ ret = __die(str, err, regs);
+
+ oops_end(flags, regs, ret);
}
void arm64_notify_die(const char *str, struct pt_regs *regs,
@@ -400,6 +434,8 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
if (call_undef_hook(regs) == 0)
return;
+ trace_undef_instr(regs, (void *)pc);
+
if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: undefined instruction: pc=%p\n",
current->comm, task_pid_nr(current), pc);
@@ -518,6 +554,12 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n",
handler[reason], esr, esr_get_class_string(esr));
+ if (esr >> ESR_ELx_EC_SHIFT == ESR_ELx_EC_SERROR) {
+ pr_crit("System error detected. ESR.ISS = %08x\n",
+ esr & 0xffffff);
+ arm64_check_cache_ecc(NULL);
+ }
+
local_irq_disable();
panic("bad mode");
}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a6f1df69c0c3..91f541a1bdb1 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -1,5 +1,7 @@
/*
- * VDSO implementation for AArch64 and vector page setup for AArch32.
+ * Additional userspace pages setup for AArch64 and AArch32.
+ * - AArch64: vDSO pages setup, vDSO data page update.
+ * - AArch32: sigreturn and kuser helpers pages setup.
*
* Copyright (C) 2012 ARM Limited
*
@@ -36,9 +38,11 @@
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
-extern char vdso_start[], vdso_end[];
-static unsigned long vdso_pages;
-static struct page **vdso_pagelist;
+struct vdso_mappings {
+ unsigned long num_code_pages;
+ struct vm_special_mapping data_mapping;
+ struct vm_special_mapping code_mapping;
+};
/*
* The vDSO data page.
@@ -53,149 +57,258 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
/*
* Create and map the vectors page for AArch32 tasks.
*/
-static struct page *vectors_page[1];
+#if !defined(CONFIG_VDSO32) || defined(CONFIG_KUSER_HELPERS)
+static struct page *vectors_page[] __ro_after_init;
+static const struct vm_special_mapping compat_vdso_spec[] = {
+ {
+ /* Must be named [sigpage] for compatibility with arm. */
+ .name = "[sigpage]",
+ .pages = &vectors_page[0],
+ },
+#ifdef CONFIG_KUSER_HELPERS
+ {
+ .name = "[kuserhelpers]",
+ .pages = &vectors_page[1],
+ },
+#endif
+};
+static struct page *vectors_page[ARRAY_SIZE(compat_vdso_spec)] __ro_after_init;
+#endif
static int __init alloc_vectors_page(void)
{
+#ifdef CONFIG_KUSER_HELPERS
extern char __kuser_helper_start[], __kuser_helper_end[];
- extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
-
- int kuser_sz = __kuser_helper_end - __kuser_helper_start;
- int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
- unsigned long vpage;
+ size_t kuser_sz = __kuser_helper_end - __kuser_helper_start;
+ unsigned long kuser_vpage;
+#endif
- vpage = get_zeroed_page(GFP_ATOMIC);
+#ifndef CONFIG_VDSO32
+ extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+ size_t sigret_sz =
+ __aarch32_sigret_code_end - __aarch32_sigret_code_start;
+ unsigned long sigret_vpage;
- if (!vpage)
+ sigret_vpage = get_zeroed_page(GFP_ATOMIC);
+ if (!sigret_vpage)
return -ENOMEM;
+#endif
+
+#ifdef CONFIG_KUSER_HELPERS
+ kuser_vpage = get_zeroed_page(GFP_ATOMIC);
+ if (!kuser_vpage) {
+#ifndef CONFIG_VDSO32
+ free_page(sigret_vpage);
+#endif
+ return -ENOMEM;
+ }
+#endif
- /* kuser helpers */
- memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
- kuser_sz);
-
+#ifndef CONFIG_VDSO32
/* sigreturn code */
- memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
- __aarch32_sigret_code_start, sigret_sz);
+ memcpy((void *)sigret_vpage, __aarch32_sigret_code_start, sigret_sz);
+ flush_icache_range(sigret_vpage, sigret_vpage + PAGE_SIZE);
+ vectors_page[0] = virt_to_page(sigret_vpage);
+#endif
- flush_icache_range(vpage, vpage + PAGE_SIZE);
- vectors_page[0] = virt_to_page(vpage);
+#ifdef CONFIG_KUSER_HELPERS
+ /* kuser helpers */
+ memcpy((void *)kuser_vpage + 0x1000 - kuser_sz, __kuser_helper_start,
+ kuser_sz);
+ flush_icache_range(kuser_vpage, kuser_vpage + PAGE_SIZE);
+ vectors_page[1] = virt_to_page(kuser_vpage);
+#endif
return 0;
}
arch_initcall(alloc_vectors_page);
+#ifndef CONFIG_VDSO32
int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
- unsigned long addr = AARCH32_VECTORS_BASE;
- static const struct vm_special_mapping spec = {
- .name = "[vectors]",
- .pages = vectors_page,
-
- };
+ unsigned long addr;
void *ret;
down_write(&mm->mmap_sem);
- current->mm->context.vdso = (void *)addr;
+ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = ERR_PTR(addr);
+ goto out;
+ }
- /* Map vectors page at the high address. */
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
- VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
- &spec);
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ &compat_vdso_spec[0]);
+ if (IS_ERR(ret))
+ goto out;
+
+ current->mm->context.vdso = (void *)addr;
+#ifdef CONFIG_KUSER_HELPERS
+ /* Map the kuser helpers at the ABI-defined high address. */
+ ret = _install_special_mapping(mm, AARCH32_KUSER_HELPERS_BASE,
+ PAGE_SIZE,
+ VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
+ &compat_vdso_spec[1]);
+#endif
+out:
up_write(&mm->mmap_sem);
return PTR_ERR_OR_ZERO(ret);
}
+#endif /* !CONFIG_VDSO32 */
#endif /* CONFIG_COMPAT */
-static struct vm_special_mapping vdso_spec[2];
-
-static int __init vdso_init(void)
+static int __init vdso_mappings_init(const char *name,
+ const char *code_start,
+ const char *code_end,
+ struct vdso_mappings *mappings)
{
- int i;
+ unsigned long i, vdso_pages;
+ struct page **vdso_pagelist;
unsigned long pfn;
- if (memcmp(vdso_start, "\177ELF", 4)) {
- pr_err("vDSO is not a valid ELF object!\n");
+ if (memcmp(code_start, "\177ELF", 4)) {
+ pr_err("%s is not a valid ELF object!\n", name);
return -EINVAL;
}
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
- pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
- vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
-
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
+ vdso_pages = (code_end - code_start) >> PAGE_SHIFT;
+ pr_info("%s: %ld pages (%ld code @ %p, %ld data @ %p)\n",
+ name, vdso_pages + 1, vdso_pages, code_start, 1L,
+ vdso_data);
+
+ /*
+ * Allocate space for storing pointers to the vDSO code pages + the
+ * data page. The pointers must have the same lifetime as the mappings,
+ * which are static, so there is no need to keep track of the pointer
+ * array to free it.
+ */
+ vdso_pagelist = kmalloc_array(vdso_pages + 1, sizeof(struct page *),
+ GFP_KERNEL);
if (vdso_pagelist == NULL)
return -ENOMEM;
/* Grab the vDSO data page. */
vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
/* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_start);
+ pfn = sym_to_pfn(code_start);
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
/* Populate the special mapping structures */
- vdso_spec[0] = (struct vm_special_mapping) {
+ mappings->data_mapping = (struct vm_special_mapping) {
.name = "[vvar]",
- .pages = vdso_pagelist,
+ .pages = &vdso_pagelist[0],
};
- vdso_spec[1] = (struct vm_special_mapping) {
+ mappings->code_mapping = (struct vm_special_mapping) {
.name = "[vdso]",
.pages = &vdso_pagelist[1],
};
+ mappings->num_code_pages = vdso_pages;
return 0;
}
+
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_VDSO32
+
+static struct vdso_mappings vdso32_mappings __ro_after_init;
+
+static int __init vdso32_init(void)
+{
+ extern char vdso32_start[], vdso32_end[];
+
+ return vdso_mappings_init("vdso32", vdso32_start, vdso32_end,
+ &vdso32_mappings);
+}
+arch_initcall(vdso32_init);
+
+#endif /* CONFIG_VDSO32 */
+#endif /* CONFIG_COMPAT */
+
+static struct vdso_mappings vdso_mappings __ro_after_init;
+
+static int __init vdso_init(void)
+{
+ extern char vdso_start[], vdso_end[];
+
+ return vdso_mappings_init("vdso", vdso_start, vdso_end,
+ &vdso_mappings);
+}
+
arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+static int vdso_setup(struct mm_struct *mm,
+ const struct vdso_mappings *mappings)
{
- struct mm_struct *mm = current->mm;
unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
void *ret;
- vdso_text_len = vdso_pages << PAGE_SHIFT;
+ vdso_text_len = mappings->num_code_pages << PAGE_SHIFT;
/* Be sure to map the data page */
vdso_mapping_len = vdso_text_len + PAGE_SIZE;
- down_write(&mm->mmap_sem);
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- ret = ERR_PTR(vdso_base);
- goto up_fail;
- }
+ if (IS_ERR_VALUE(vdso_base))
+ return PTR_ERR_OR_ZERO(ERR_PTR(vdso_base));
ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
VM_READ|VM_MAYREAD,
- &vdso_spec[0]);
+ &mappings->data_mapping);
if (IS_ERR(ret))
- goto up_fail;
+ return PTR_ERR_OR_ZERO(ret);
vdso_base += PAGE_SIZE;
- mm->context.vdso = (void *)vdso_base;
ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_spec[1]);
- if (IS_ERR(ret))
- goto up_fail;
+ &mappings->code_mapping);
+ if (!IS_ERR(ret))
+ mm->context.vdso = (void *)vdso_base;
+ return PTR_ERR_OR_ZERO(ret);
+}
+
+#ifdef CONFIG_COMPAT
+#ifdef CONFIG_VDSO32
+int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ void *ret;
+ down_write(&mm->mmap_sem);
+
+ ret = ERR_PTR(vdso_setup(mm, &vdso32_mappings));
+#ifdef CONFIG_KUSER_HELPERS
+ if (!IS_ERR(ret))
+ /* Map the kuser helpers at the ABI-defined high address. */
+ ret = _install_special_mapping(mm, AARCH32_KUSER_HELPERS_BASE,
+ PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYEXEC,
+ &compat_vdso_spec[1]);
+#endif
up_write(&mm->mmap_sem);
- return 0;
-up_fail:
- mm->context.vdso = NULL;
+ return PTR_ERR_OR_ZERO(ret);
+}
+#endif /* CONFIG_VDSO32 */
+#endif /* CONFIG_COMPAT */
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ down_write(&mm->mmap_sem);
+ ret = vdso_setup(mm, &vdso_mappings);
up_write(&mm->mmap_sem);
- return PTR_ERR(ret);
+ return ret;
}
/*
@@ -216,16 +329,20 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
+ struct timespec btm = ktime_to_timespec(tk->offs_boot);
+
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->raw_time_sec = tk->raw_sec;
vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */
vdso_data->cs_shift = tk->tkr_mono.shift;
+ vdso_data->btm_sec = btm.tv_sec;
+ vdso_data->btm_nsec = btm.tv_nsec;
}
smp_wmb();
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 62c84f7cb01b..4adcb532ac6a 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -5,18 +5,32 @@
# Heavily based on the vDSO Makefiles for other archs.
#
-obj-vdso := gettimeofday.o note.o sigreturn.o
+obj-vdso-s := note.o sigreturn.o
+obj-vdso-c := vgettimeofday.o
# Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg
-obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+targets := $(obj-vdso-s) $(obj-vdso-c) vdso.so vdso.so.dbg
+obj-vdso-s := $(addprefix $(obj)/, $(obj-vdso-s))
+obj-vdso-c := $(addprefix $(obj)/, $(obj-vdso-c))
+obj-vdso := $(obj-vdso-c) $(obj-vdso-s)
-ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
+ccflags-y += -DDISABLE_BRANCH_PROFILING -ffixed-x18
ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+# Force -O2 to avoid libgcc dependencies
+CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
+CFLAGS_vgettimeofday.o = -O2 -fPIC
+ifneq ($(cc-name),clang)
+CFLAGS_vgettimeofday.o += -mcmodel=tiny
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
# down to collect2, resulting in silent corruption of the vDSO image.
@@ -49,12 +63,17 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
+$(obj-vdso-s): %.o: %.S FORCE
$(call if_changed_dep,vdsoas)
+$(obj-vdso-c): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+
# Actual build commands
quiet_cmd_vdsold = VDSOL $@
cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = ${CC} $(c_flags) -c -o $@ $<
quiet_cmd_vdsoas = VDSOA $@
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/arm64/kernel/vdso/compiler.h b/arch/arm64/kernel/vdso/compiler.h
new file mode 100644
index 000000000000..fb27545640f2
--- /dev/null
+++ b/arch/arm64/kernel/vdso/compiler.h
@@ -0,0 +1,70 @@
+/*
+ * Userspace implementations of fallback calls
+ *
+ * Copyright (C) 2017 Cavium, Inc.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Rewriten into C by: Andrew Pinski <apinski@cavium.com>
+ */
+
+#ifndef __VDSO_COMPILER_H
+#define __VDSO_COMPILER_H
+
+#include <asm/processor.h> /* for cpu_relax() */
+#include <asm/sysreg.h> /* for read_sysreg() */
+#include <asm/unistd.h>
+#include <linux/compiler.h>
+#include <linux/hrtimer.h> /* for LOW_RES_NSEC and MONOTONIC_RES_NSEC */
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_PROVIDES_TIMER
+#endif
+
+#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \
+static notrace long name##_fallback(type_arg1 _##name_arg1, \
+ type_arg2 _##name_arg2) \
+{ \
+ register type_arg1 name_arg1 asm("x0") = _##name_arg1; \
+ register type_arg2 name_arg2 asm("x1") = _##name_arg2; \
+ register long ret asm ("x0"); \
+ register long nr asm("x8") = __NR_##name; \
+ \
+ asm volatile( \
+ " svc #0\n" \
+ : "=r" (ret) \
+ : "r" (name_arg1), "r" (name_arg2), "r" (nr) \
+ : "memory"); \
+ \
+ return ret; \
+}
+
+/*
+ * AArch64 implementation of arch_counter_get_cntvct() suitable for vdso
+ */
+static __always_inline notrace u64 arch_vdso_read_counter(void)
+{
+ /* Read the virtual counter. */
+ isb();
+ return read_sysreg(cntvct_el0);
+}
+
+/* Rename exported vdso functions */
+#define __vdso_clock_gettime __kernel_clock_gettime
+#define __vdso_gettimeofday __kernel_gettimeofday
+#define __vdso_clock_getres __kernel_clock_getres
+#define __vdso_time __kernel_time
+
+#endif /* __VDSO_COMPILER_H */
diff --git a/arch/arm64/kernel/vdso/datapage.h b/arch/arm64/kernel/vdso/datapage.h
new file mode 100644
index 000000000000..be86a6074cf8
--- /dev/null
+++ b/arch/arm64/kernel/vdso/datapage.h
@@ -0,0 +1,59 @@
+/*
+ * Userspace implementations of __get_datapage
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <asm/vdso_datapage.h>
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * mustn't be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation (R_ARM_REL32), and this is what we need.
+ */
+extern const struct vdso_data _vdso_data __attribute__((visibility("hidden")));
+
+static inline const struct vdso_data *__get_datapage(void)
+{
+ const struct vdso_data *ret;
+ /*
+ * This simply puts &_vdso_data into ret. The reason why we don't use
+ * `ret = &_vdso_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_data in a register,
+ * it goes through a relocation almost every time _vdso_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __get_datapage() is called, and then keep the result in a
+ * register.
+ */
+ asm("" : "=r"(ret) : "0"(&_vdso_data));
+ return ret;
+}
+
+/* We can only guarantee 56 bits of precision. */
+#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0)
+
+#endif /* __VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index b3e6c4d5b75c..3dc1198b5ec9 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -94,6 +94,7 @@ VERSION
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
local: *;
};
}
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..b73d4011993d
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,3 @@
+#include "compiler.h"
+#include "datapage.h"
+#include "../../../../lib/vdso/vgettimeofday.c"
diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore
new file mode 100644
index 000000000000..4fea950fa5ed
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+vdso.lds
+vdso.so.raw
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
new file mode 100644
index 000000000000..807d08e28c27
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -0,0 +1,178 @@
+#
+# Building a vDSO image for AArch32.
+#
+# Author: Kevin Brodsky <kevin.brodsky@arm.com>
+# A mix between the arm64 and arm vDSO Makefiles.
+
+ifeq ($(cc-name),clang)
+ CC_ARM32 := $(CC) $(CLANG_TARGET_ARM32) -no-integrated-as
+ GCC_ARM32_TC := $(realpath $(dir $(shell which $(CROSS_COMPILE_ARM32)ld))/..)
+ifneq ($(GCC_ARM32_TC),)
+ CC_ARM32 += --gcc-toolchain=$(GCC_ARM32_TC)
+endif
+else
+ CC_ARM32 := $(CROSS_COMPILE_ARM32)$(cc-name)
+endif
+
+# Same as cc-*option, but using CC_ARM32 instead of CC
+cc32-option = $(call try-run,\
+ $(CC_ARM32) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-disable-warning = $(call try-run,\
+ $(CC_ARM32) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+cc32-ldoption = $(call try-run,\
+ $(CC_ARM32) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# We cannot use the global flags to compile the vDSO files, the main reason
+# being that the 32-bit compiler may be older than the main (64-bit) compiler
+# and therefore may not understand flags set using $(cc-option ...). Besides,
+# arch-specific options should be taken from the arm Makefile instead of the
+# arm64 one.
+# As a result we set our own flags here.
+
+# From top-level Makefile
+# NOSTDINC_FLAGS
+VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(CC_ARM32) -print-file-name=include)
+VDSO_CPPFLAGS += $(LINUXINCLUDE)
+VDSO_CPPFLAGS += -D__KERNEL__
+VDSO_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+VDSO_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS)
+
+# Common C and assembly flags
+# From top-level Makefile
+VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
+ifdef CONFIG_DEBUG_INFO
+VDSO_CAFLAGS += -g
+endif
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC_ARM32)), y)
+VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
+# From arm Makefile
+VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
+VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+VDSO_CAFLAGS += -mbig-endian
+else
+VDSO_CAFLAGS += -mlittle-endian
+endif
+
+# From arm vDSO Makefile
+VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
+VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
+# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
+# fall back to v7. There is no easy way to check for what architecture the code
+# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
+VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
+ -march=armv7-a -D__LINUX_ARM_ARCH__=7)
+
+VDSO_CFLAGS := $(VDSO_CAFLAGS)
+# KBUILD_CFLAGS from top-level Makefile
+VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -fno-common \
+ -Werror-implicit-function-declaration \
+ -Wno-format-security \
+ -std=gnu89
+VDSO_CFLAGS += -O2
+# Some useful compiler-dependent flags from top-level Makefile
+VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
+VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
+VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
+VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
+
+# The 32-bit compiler does not provide 128-bit integers, which are used in
+# some headers that are indirectly included from the vDSO code.
+# This hack makes the compiler happy and should trigger a warning/error if
+# variables of such type are referenced.
+VDSO_CFLAGS += -D__uint128_t='void*'
+# Silence some warnings coming from headers that operate on long's
+# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
+VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
+VDSO_CFLAGS += -Wno-int-to-pointer-cast
+
+VDSO_AFLAGS := $(VDSO_CAFLAGS)
+VDSO_AFLAGS += -D__ASSEMBLY__
+
+VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
+# From arm vDSO Makefile
+VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+
+
+# Borrow vdsomunge.c from the arm vDSO
+# We have to use a relative path because scripts/Makefile.host prefixes
+# $(hostprogs-y) with $(obj)
+munge := ../../../arm/vdso/vdsomunge
+hostprogs-y := $(munge)
+
+c-obj-vdso := vgettimeofday.o
+asm-obj-vdso := sigreturn.o
+
+# Build rules
+targets := $(c-obj-vdso) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
+asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
+obj-vdso := $(c-obj-vdso) $(asm-obj-vdso)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (vdso.s includes vdso.so through incbin)
+$(obj)/vdso.o: $(obj)/vdso.so
+
+include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+# Strip rule for vdso.so
+$(obj)/vdso.so: OBJCOPYFLAGS := -S
+$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+ $(call if_changed,vdsomunge)
+
+# Link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold)
+
+# Compilation rules for the vDSO sources
+$(filter-out vgettimeofday.o, $(c-obj-vdso)): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+$(asm-obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSOL32 $@
+ cmd_vdsold = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+quiet_cmd_vdsocc = VDSOC32 $@
+ cmd_vdsocc = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+quiet_cmd_vdsoas = VDSOA32 $@
+ cmd_vdsoas = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+
+quiet_cmd_vdsomunge = MUNGE $@
+ cmd_vdsomunge = $(obj)/$(munge) $< $@
+
+# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
+gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+# The AArch64 nm should be able to read an AArch32 binary
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+# Install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/arm64/kernel/vdso32/compiler.h b/arch/arm64/kernel/vdso32/compiler.h
new file mode 100644
index 000000000000..19a43fc37bb9
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/compiler.h
@@ -0,0 +1,122 @@
+/*
+ * Userspace implementations of fallback calls
+ *
+ * Copyright (C) 2017 Cavium, Inc.
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Rewriten into C by: Andrew Pinski <apinski@cavium.com>
+ */
+
+#ifndef __VDSO_COMPILER_H
+#define __VDSO_COMPILER_H
+
+#include <generated/autoconf.h>
+#undef CONFIG_64BIT
+#include <asm/barrier.h> /* for isb() & dmb() */
+#include <asm/param.h> /* for HZ */
+#include <asm/unistd32.h>
+#include <linux/compiler.h>
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_PROVIDES_TIMER
+#endif
+
+/* can not include linux/time.h because of too much architectural cruft */
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC 1000000000L
+#endif
+
+/* can not include linux/jiffies.h because of too much architectural cruft */
+#ifndef TICK_NSEC
+#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
+#endif
+
+/* can not include linux/hrtimer.h because of too much architectural cruft */
+#ifndef LOW_RES_NSEC
+#define LOW_RES_NSEC TICK_NSEC
+#ifdef ARCH_PROVIDES_TIMER
+#ifdef CONFIG_HIGH_RES_TIMERS
+# define HIGH_RES_NSEC 1
+# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
+#else
+# define MONOTONIC_RES_NSEC LOW_RES_NSEC
+#endif
+#endif
+#endif
+
+#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \
+static notrace long name##_fallback(type_arg1 _##name_arg1, \
+ type_arg2 _##name_arg2) \
+{ \
+ register type_arg1 name_arg1 asm("r0") = _##name_arg1; \
+ register type_arg2 name_arg2 asm("r1") = _##name_arg2; \
+ register long ret asm ("r0"); \
+ register long nr asm("r7") = __NR_##name; \
+ \
+ asm volatile( \
+ " swi #0\n" \
+ : "=r" (ret) \
+ : "r" (name_arg1), "r" (name_arg2), "r" (nr) \
+ : "memory"); \
+ \
+ return ret; \
+}
+
+/*
+ * AArch32 implementation of arch_counter_get_cntvct() suitable for vdso
+ */
+static __always_inline notrace u64 arch_vdso_read_counter(void)
+{
+ u64 res;
+
+ /* Read the virtual counter. */
+ isb();
+ asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+
+ return res;
+}
+
+/*
+ * Can not include asm/processor.h to pick this up because of all the
+ * architectural components also included, so we open code a copy.
+ */
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
+#undef smp_rmb
+#if __LINUX_ARM_ARCH__ >= 8
+#define smp_rmb() dmb(ishld) /* ok on ARMv8 */
+#else
+#define smp_rmb() dmb(ish) /* ishld does not exist on ARMv7 */
+#endif
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}
+
+#endif /* __VDSO_COMPILER_H */
diff --git a/arch/arm64/kernel/vdso32/datapage.h b/arch/arm64/kernel/vdso32/datapage.h
new file mode 100644
index 000000000000..fe3e216d94d1
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/datapage.h
@@ -0,0 +1 @@
+#include "../vdso/datapage.h"
diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S
new file mode 100644
index 000000000000..14e5f9ca34f9
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/sigreturn.S
@@ -0,0 +1,76 @@
+/*
+ * Sigreturn trampolines for returning from a signal when the SA_RESTORER
+ * flag is not set.
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Based on glibc's arm sa_restorer. While this is not strictly necessary, we
+ * provide both A32 and T32 versions, in accordance with the arm sigreturn
+ * code.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd32.h>
+
+.macro sigreturn_trampoline name, syscall, regs_offset
+ /*
+ * We provide directives for enabling stack unwinding through the
+ * trampoline. On arm, CFI directives are only used for debugging (and
+ * the vDSO is stripped of debug information), so only the arm-specific
+ * unwinding directives are useful here.
+ */
+ .fnstart
+ .save {r0-r15}
+ .pad #\regs_offset
+ /*
+ * It is necessary to start the unwind tables at least one instruction
+ * before the trampoline, as the unwinder will assume that the signal
+ * handler has been called from the trampoline, that is just before
+ * where the signal handler returns (mov r7, ...).
+ */
+ nop
+ENTRY(\name)
+ mov r7, #\syscall
+ svc #0
+ .fnend
+ /*
+ * We would like to use ENDPROC, but the macro uses @ which is a
+ * comment symbol for arm assemblers, so directly use .type with %
+ * instead.
+ */
+ .type \name, %function
+END(\name)
+.endm
+
+ .text
+
+ .arm
+ sigreturn_trampoline __kernel_sigreturn_arm, \
+ __NR_sigreturn, \
+ COMPAT_SIGFRAME_REGS_OFFSET
+
+ sigreturn_trampoline __kernel_rt_sigreturn_arm, \
+ __NR_rt_sigreturn, \
+ COMPAT_RT_SIGFRAME_REGS_OFFSET
+
+ .thumb
+ sigreturn_trampoline __kernel_sigreturn_thumb, \
+ __NR_sigreturn, \
+ COMPAT_SIGFRAME_REGS_OFFSET
+
+ sigreturn_trampoline __kernel_rt_sigreturn_thumb, \
+ __NR_rt_sigreturn, \
+ COMPAT_RT_SIGFRAME_REGS_OFFSET
diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S
new file mode 100644
index 000000000000..fe19ff70eb76
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.S
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+ .globl vdso32_start, vdso32_end
+ .section .rodata
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/arm64/kernel/vdso32/vdso.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S
new file mode 100644
index 000000000000..f95cb1c431fb
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vdso.lds.S
@@ -0,0 +1,95 @@
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ * Heavily based on the vDSO linker scripts for other archs.
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+ PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+
+ .text : { *(.text*) } :text =0xe7f001f2
+
+ .got : { *(.got) }
+ .rel.plt : { *(.rel.plt) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+}
+
+VERSION
+{
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_clock_getres;
+ __vdso_time;
+ __kernel_sigreturn_arm;
+ __kernel_sigreturn_thumb;
+ __kernel_rt_sigreturn_arm;
+ __kernel_rt_sigreturn_thumb;
+ local: *;
+ };
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
+VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
+VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
+VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c
new file mode 100644
index 000000000000..b73d4011993d
--- /dev/null
+++ b/arch/arm64/kernel/vdso32/vgettimeofday.c
@@ -0,0 +1,3 @@
+#include "compiler.h"
+#include "datapage.h"
+#include "../../../../lib/vdso/vgettimeofday.c"
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 6e4832def254..71c8076bbc60 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -11,6 +11,7 @@
#include <asm/memory.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/cache.h>
#include "image.h"
@@ -46,6 +47,16 @@ jiffies = jiffies_64;
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .;
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT \
+ . = ALIGN(SZ_4K); \
+ VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ *(.hibernate_exit.text) \
+ VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
@@ -74,14 +85,19 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#endif
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
-#elif defined(CONFIG_DEBUG_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+/*
+ * 4 KB granule: 1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule: 32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN SZ_2M
#else
-#define ALIGN_DEBUG_RO
-#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
+/*
+ * 4 KB granule: 16 level 3 entries, with contiguous bit
+ * 16 KB granule: 4 level 3 entries, without contiguous bit
+ * 64 KB granule: 1 level 3 entry
+ */
+#define SEGMENT_ALIGN SZ_64K
#endif
SECTIONS
@@ -99,6 +115,7 @@ SECTIONS
*(.discard)
*(.discard.*)
*(.interp .dynamic)
+ *(.dynsym .dynstr .hash)
}
. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -107,19 +124,21 @@ SECTIONS
_text = .;
HEAD_TEXT
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
*(.exception.text)
__exception_text_end = .;
IRQENTRY_TEXT
+ ENTRY_TEXT
SOFTIRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
+ HIBERNATE_TEXT
TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
@@ -127,14 +146,14 @@ SECTIONS
*(.got) /* Global offset table */
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text section */
RO_DATA(PAGE_SIZE) /* everything from this point to */
EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
NOTES
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
__init_begin = .;
INIT_TEXT_SECTION(8)
@@ -166,27 +185,37 @@ SECTIONS
*(.altinstr_replacement)
}
.rela : ALIGN(8) {
- __reloc_start = .;
*(.rela .rela*)
- __reloc_end = .;
- }
- .dynsym : ALIGN(8) {
- __dynsym_start = .;
- *(.dynsym)
- }
- .dynstr : {
- *(.dynstr)
- }
- .hash : {
- *(.hash)
}
- . = ALIGN(PAGE_SIZE);
+ __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
+ __rela_size = SIZEOF(.rela);
+
+ . = ALIGN(SEGMENT_ALIGN);
__init_end = .;
_data = .;
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+ /*
+ * Data written with the MMU off but read with the MMU on requires
+ * cache lines to be invalidated, discarding up to a Cache Writeback
+ * Granule (CWG) of data from the cache. Keep the section that
+ * requires this type of maintenance to be in its own Cache Writeback
+ * Granule (CWG) area so the cache maintenance operations don't
+ * interfere with adjacent data.
+ */
+ .mmuoff.data.write : ALIGN(SZ_2K) {
+ __mmuoff_data_start = .;
+ *(.mmuoff.data.write)
+ }
+ . = ALIGN(SZ_2K);
+ .mmuoff.data.read : {
+ *(.mmuoff.data.read)
+ __mmuoff_data_end = .;
+ }
+
PECOFF_EDATA_PADDING
_edata = .;
@@ -223,6 +252,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"HYP init code too big or misaligned")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,