diff options
Diffstat (limited to 'arch/arm64/kernel')
69 files changed, 5275 insertions, 1441 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d10034327423..18938199a838 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -25,14 +25,18 @@ OBJCOPYFLAGS := --prefix-symbols=__efistub_ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) -arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o \ - ../../arm/kernel/opcodes.o +arm64-obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ + sys_compat.o entry32.o +ifneq ($(CONFIG_VDSO32),y) +arm64-obj-$(CONFIG_COMPAT) += sigreturn32.o +endif +arm64-obj-$(CONFIG_KUSER_HELPERS) += kuser32.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_debug.o perf_trace_counters.o \ + perf_trace_user.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o @@ -42,10 +46,19 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o -arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +arm64-obj-$(CONFIG_MSM_APP_API) += app_api.o +arm64-obj-$(CONFIG_MSM_APP_SETTINGS) += app_setting.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o + +ifeq ($(CONFIG_KVM),y) +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o +endif -obj-y += $(arm64-obj-y) vdso/ +obj-y += $(arm64-obj-y) vdso/ probes/ +obj-$(CONFIG_VDSO32) += vdso32/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d2ee1b21a10d..737481c8e918 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -25,14 +25,13 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> #include <asm/insn.h> +#include <asm/sections.h> #include <linux/stop_machine.h> #define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -108,7 +107,7 @@ static void __apply_alternatives(void *alt_region) for (i = 0; i < nr_inst; i++) { insn = get_alt_insn(alt, origptr + i, replptr + i); - *(origptr + i) = cpu_to_le32(insn); + BUG_ON(aarch64_insn_patch_text_nosync(origptr + i, insn)); } flush_icache_range((uintptr_t)origptr, @@ -124,8 +123,8 @@ static int __apply_alternatives_multi_stop(void *unused) { static int patched = 0; struct alt_region region = { - .begin = __alt_instructions, - .end = __alt_instructions_end, + .begin = (struct alt_instr *)__alt_instructions, + .end = (struct alt_instr *)__alt_instructions_end, }; /* We always have a CPU 0 at this point (__init) */ diff --git a/arch/arm64/kernel/app_api.c b/arch/arm64/kernel/app_api.c new file mode 100644 index 000000000000..e995bbf3c7b4 --- /dev/null +++ b/arch/arm64/kernel/app_api.c @@ -0,0 +1,135 @@ +/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/bitops.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/export.h> + +#include <asm/app_api.h> + +static spinlock_t spinlock; +static spinlock_t spinlock_32bit_app; +static DEFINE_PER_CPU(int, app_config_applied); +static unsigned long app_config_set[NR_CPUS]; +static unsigned long app_config_clear[NR_CPUS]; + +void set_app_setting_bit(uint32_t bit) +{ + unsigned long flags; + uint64_t reg; + int cpu; + + spin_lock_irqsave(&spinlock, flags); + asm volatile("mrs %0, S3_1_C15_C15_0" : "=r" (reg)); + reg = reg | BIT(bit); + isb(); + asm volatile("msr S3_1_C15_C15_0, %0" : : "r" (reg)); + isb(); + if (bit == APP_SETTING_BIT) { + cpu = raw_smp_processor_id(); + app_config_set[cpu]++; + + this_cpu_write(app_config_applied, 1); + } + spin_unlock_irqrestore(&spinlock, flags); + +} +EXPORT_SYMBOL(set_app_setting_bit); + +void clear_app_setting_bit(uint32_t bit) +{ + unsigned long flags; + uint64_t reg; + int cpu; + + spin_lock_irqsave(&spinlock, flags); + asm volatile("mrs %0, S3_1_C15_C15_0" : "=r" (reg)); + reg = reg & ~BIT(bit); + isb(); + asm volatile("msr S3_1_C15_C15_0, %0" : : "r" (reg)); + isb(); + if (bit == APP_SETTING_BIT) { + cpu = raw_smp_processor_id(); + app_config_clear[cpu]++; + + this_cpu_write(app_config_applied, 0); + } + spin_unlock_irqrestore(&spinlock, flags); +} +EXPORT_SYMBOL(clear_app_setting_bit); + +void set_app_setting_bit_for_32bit_apps(void) +{ + unsigned long flags; + uint64_t reg; + + spin_lock_irqsave(&spinlock_32bit_app, flags); + if (use_32bit_app_setting) { + asm volatile("mrs %0, S3_0_c15_c15_0 " : "=r" (reg)); + reg = reg | BIT(24); + isb(); + asm volatile("msr S3_0_c15_c15_0, %0" : : "r" (reg)); + isb(); + asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg)); + reg = reg | BIT(18) | BIT(2) | BIT(0); + isb(); + asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg)); + isb(); + } else if (use_32bit_app_setting_pro) { + asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg)); + reg = reg | BIT(18); + isb(); + asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg)); + isb(); + } + spin_unlock_irqrestore(&spinlock_32bit_app, flags); +} +EXPORT_SYMBOL(set_app_setting_bit_for_32bit_apps); + +void clear_app_setting_bit_for_32bit_apps(void) +{ + unsigned long flags; + uint64_t reg; + + spin_lock_irqsave(&spinlock_32bit_app, flags); + if (use_32bit_app_setting) { + asm volatile("mrs %0, S3_0_c15_c15_0 " : "=r" (reg)); + reg = reg & ~BIT(24); + isb(); + asm volatile("msr S3_0_c15_c15_0, %0" : : "r" (reg)); + isb(); + asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg)); + reg = reg & ~BIT(18); + reg = reg & ~BIT(2); + reg = reg & ~BIT(0); + isb(); + asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg)); + isb(); + } else if (use_32bit_app_setting_pro) { + asm volatile("mrs %0, S3_0_c15_c15_1 " : "=r" (reg)); + reg = reg & ~BIT(18); + isb(); + asm volatile("msr S3_0_c15_c15_1, %0" : : "r" (reg)); + isb(); + } + spin_unlock_irqrestore(&spinlock_32bit_app, flags); +} +EXPORT_SYMBOL(clear_app_setting_bit_for_32bit_apps); + +static int __init init_app_api(void) +{ + spin_lock_init(&spinlock); + spin_lock_init(&spinlock_32bit_app); + return 0; +} +early_initcall(init_app_api); diff --git a/arch/arm64/kernel/app_setting.c b/arch/arm64/kernel/app_setting.c new file mode 100644 index 000000000000..0c6b00317645 --- /dev/null +++ b/arch/arm64/kernel/app_setting.c @@ -0,0 +1,139 @@ +/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/notifier.h> + +#include <asm/app_api.h> + +#define MAX_LEN 100 + +static char *lib_names[MAX_ENTRIES]; +static unsigned int count; +static struct mutex mutex; + +static char lib_str[MAX_LEN] = ""; +static struct kparam_string kps = { + .string = lib_str, + .maxlen = MAX_LEN, +}; +static int set_name(const char *str, struct kernel_param *kp); +module_param_call(lib_name, set_name, param_get_string, &kps, S_IWUSR); + +bool use_app_setting = true; +module_param(use_app_setting, bool, 0644); +MODULE_PARM_DESC(use_app_setting, "control use of app specific settings"); + +bool use_32bit_app_setting = true; +module_param(use_32bit_app_setting, bool, 0644); +MODULE_PARM_DESC(use_32bit_app_setting, "control use of 32 bit app specific settings"); + +bool use_32bit_app_setting_pro; +module_param(use_32bit_app_setting_pro, bool, 0644); +MODULE_PARM_DESC(use_32bit_app_setting_pro, "control use of 32 bit app specific settings"); + +static int set_name(const char *str, struct kernel_param *kp) +{ + int len = strlen(str); + char *name; + + if (len >= MAX_LEN) { + pr_err("app_setting: name string too long\n"); + return -ENOSPC; + } + + /* + * echo adds '\n' which we need to chop off later + */ + name = kzalloc(len + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + + strlcpy(name, str, len + 1); + + if (name[len - 1] == '\n') + name[len - 1] = '\0'; + + mutex_lock(&mutex); + if (count < MAX_ENTRIES) { + lib_names[count] = name; + /* + * mb to ensure that the new lib_names entry is present + * before updating the view presented by get_lib_names + */ + mb(); + count++; + } else { + pr_err("app_setting: set name failed. Max entries reached\n"); + kfree(name); + mutex_unlock(&mutex); + return -EPERM; + } + mutex_unlock(&mutex); + + return 0; +} + +void switch_app_setting_bit(struct task_struct *prev, struct task_struct *next) +{ + if (prev->mm && unlikely(prev->mm->app_setting)) + clear_app_setting_bit(APP_SETTING_BIT); + + if (next->mm && unlikely(next->mm->app_setting)) + set_app_setting_bit(APP_SETTING_BIT); +} +EXPORT_SYMBOL(switch_app_setting_bit); + +void switch_32bit_app_setting_bit(struct task_struct *prev, + struct task_struct *next) +{ + if (prev->mm && unlikely(is_compat_thread(task_thread_info(prev)))) + clear_app_setting_bit_for_32bit_apps(); + + if (next->mm && unlikely(is_compat_thread(task_thread_info(next)))) + set_app_setting_bit_for_32bit_apps(); +} +EXPORT_SYMBOL(switch_32bit_app_setting_bit); + +void apply_app_setting_bit(struct file *file) +{ + bool found = false; + int i; + + if (file && file->f_path.dentry) { + const char *name = file->f_path.dentry->d_name.name; + + for (i = 0; i < count; i++) { + if (unlikely(!strcmp(name, lib_names[i]))) { + found = true; + break; + } + } + if (found) { + preempt_disable(); + set_app_setting_bit(APP_SETTING_BIT); + /* This will take care of child processes as well */ + current->mm->app_setting = 1; + preempt_enable(); + } + } +} +EXPORT_SYMBOL(apply_app_setting_bit); + +static int __init app_setting_init(void) +{ + mutex_init(&mutex); + return 0; +} +module_init(app_setting_init); diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 2dc44406a7ad..546ce8979b3a 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -26,8 +26,10 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/kprobes.h> #include <linux/arm-smccc.h> +#include <asm/cacheflush.h> #include <asm/checksum.h> EXPORT_SYMBOL(copy_page); @@ -68,7 +70,12 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); +NOKPROBE_SYMBOL(_mcount); #endif + /* caching functions */ +EXPORT_SYMBOL(__dma_inv_range); +EXPORT_SYMBOL(__dma_clean_range); +EXPORT_SYMBOL(__dma_flush_range); /* arm-smccc */ EXPORT_SYMBOL(arm_smccc_smc); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index fae82901ae81..e012ecd018ee 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -366,6 +366,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((*aarch32_opcode_cond_checks[cc_bits])(psr)) + return ARM_OPCODE_CONDTEST_PASS; + else + return ARM_OPCODE_CONDTEST_FAIL; + } + return ARM_OPCODE_CONDTEST_UNCOND; +} + /* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and @@ -380,7 +395,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr) type = instr & TYPE_SWPB; - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -461,7 +476,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 3fa949f04ce4..737eb8b80485 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,9 +22,11 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/suspend.h> #include <asm/fixmap.h> #include <asm/thread_info.h> #include <asm/memory.h> +#include <asm/signal32.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/vdso_datapage.h> @@ -59,6 +61,17 @@ int main(void) DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); + DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); + DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); + DEFINE(S_X14, offsetof(struct pt_regs, regs[14])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X18, offsetof(struct pt_regs, regs[18])); + DEFINE(S_X20, offsetof(struct pt_regs, regs[20])); + DEFINE(S_X22, offsetof(struct pt_regs, regs[22])); + DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); + DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); + DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); #ifdef CONFIG_COMPAT @@ -71,6 +84,18 @@ int main(void) DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_COMPAT + DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, + offsetof(struct compat_sigframe, uc) + + offsetof(struct compat_ucontext, uc_mcontext) + + offsetof(struct compat_sigcontext, arm_r0)); + DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, + offsetof(struct compat_rt_sigframe, sig) + + offsetof(struct compat_sigframe, uc) + + offsetof(struct compat_ucontext, uc_mcontext) + + offsetof(struct compat_sigcontext, arm_r0)); + BLANK(); +#endif DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); @@ -84,40 +109,6 @@ int main(void) DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - BLANK(); - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); - DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); - BLANK(); - DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec)); - DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec)); - BLANK(); - DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - BLANK(); #ifdef CONFIG_THREAD_INFO_IN_TASK DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); @@ -128,62 +119,27 @@ int main(void) DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); - DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); - DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); - DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); - DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); - DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); - DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); - DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); - DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); - DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); - DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); - DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); - DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); - DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); - DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); - DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); - DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); - DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); - DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); - DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); - DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); + BLANK(); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 DEFINE(TRAMP_VALIAS, TRAMP_VALIAS); #endif diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S new file mode 100644 index 000000000000..76225c2611ea --- /dev/null +++ b/arch/arm64/kernel/bpi.S @@ -0,0 +1,87 @@ +/* + * Contains CPU specific branch predictor invalidation sequences + * + * Copyright (C) 2018 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +.macro ventry target + .rept 31 + nop + .endr + b \target +.endm + +.macro vectors target + ventry \target + 0x000 + ventry \target + 0x080 + ventry \target + 0x100 + ventry \target + 0x180 + + ventry \target + 0x200 + ventry \target + 0x280 + ventry \target + 0x300 + ventry \target + 0x380 + + ventry \target + 0x400 + ventry \target + 0x480 + ventry \target + 0x500 + ventry \target + 0x580 + + ventry \target + 0x600 + ventry \target + 0x680 + ventry \target + 0x700 + ventry \target + 0x780 +.endm + + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept 4 + vectors __kvm_hyp_vector + .endr +ENTRY(__bp_harden_hyp_vecs_end) +ENTRY(__psci_hyp_bp_inval_start) + sub sp, sp, #(8 * 18) + stp x16, x17, [sp, #(16 * 0)] + stp x14, x15, [sp, #(16 * 1)] + stp x12, x13, [sp, #(16 * 2)] + stp x10, x11, [sp, #(16 * 3)] + stp x8, x9, [sp, #(16 * 4)] + stp x6, x7, [sp, #(16 * 5)] + stp x4, x5, [sp, #(16 * 6)] + stp x2, x3, [sp, #(16 * 7)] + stp x0, x1, [sp, #(16 * 8)] + mov x0, #0x84000000 + smc #0 + ldp x16, x17, [sp, #(16 * 0)] + ldp x14, x15, [sp, #(16 * 1)] + ldp x12, x13, [sp, #(16 * 2)] + ldp x10, x11, [sp, #(16 * 3)] + ldp x8, x9, [sp, #(16 * 4)] + ldp x6, x7, [sp, #(16 * 5)] + ldp x4, x5, [sp, #(16 * 6)] + ldp x2, x3, [sp, #(16 * 7)] + ldp x0, x1, [sp, #(16 * 8)] + add sp, sp, #(8 * 18) +ENTRY(__psci_hyp_bp_inval_end) + +ENTRY(__qcom_hyp_sanitize_link_stack_start) + stp x29, x30, [sp, #-16]! + .rept 16 + bl . + 4 + .endr + ldp x29, x30, [sp], #16 +ENTRY(__qcom_hyp_sanitize_link_stack_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 06afd04e02c0..e857248dd980 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -29,12 +29,160 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry) entry->midr_range_max); } +static bool __maybe_unused +is_kryo_midr(const struct arm64_cpu_capabilities *entry) +{ + u32 model; + + model = read_cpuid_id(); + model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | + MIDR_ARCHITECTURE_MASK; + + return model == entry->midr_model; +} + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +#ifdef CONFIG_KVM +extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[]; +extern char __qcom_hyp_sanitize_link_stack_start[]; +extern char __qcom_hyp_sanitize_link_stack_end[]; + +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K); + int i; + + for (i = 0; i < SZ_2K; i += 0x80) + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + + flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + static int last_slot = -1; + static DEFINE_SPINLOCK(bp_lock); + int cpu, slot = -1; + + spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + last_slot++; + BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) + / SZ_2K) <= last_slot); + slot = last_slot; + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + spin_unlock(&bp_lock); +} +#else +#define __psci_hyp_bp_inval_start NULL +#define __psci_hyp_bp_inval_end NULL +#define __qcom_hyp_sanitize_link_stack_start NULL +#define __qcom_hyp_sanitize_link_stack_end NULL + +static void __maybe_unused __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + __this_cpu_write(bp_hardening_data.fn, fn); +} +#endif /* CONFIG_KVM */ + +static void __maybe_unused install_bp_hardening_cb( + const struct arm64_cpu_capabilities *entry, + bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + u64 pfr0; + + if (!entry->matches(entry)) + return; + + pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + return; + + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); +} + +#include <linux/psci.h> + +static int enable_psci_bp_hardening(void *data) +{ + const struct arm64_cpu_capabilities *entry = data; + + if (psci_ops.get_version) + install_bp_hardening_cb(entry, + (bp_hardening_cb_t)psci_ops.get_version, + __psci_hyp_bp_inval_start, + __psci_hyp_bp_inval_end); + + return 0; +} + +static void __maybe_unused qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + +static void __maybe_unused qcom_bp_hardening(void) +{ + qcom_link_stack_sanitization(); + if (psci_ops.get_version) + psci_ops.get_version(); +} + +static int __maybe_unused enable_qcom_bp_hardening(void *data) +{ + const struct arm64_cpu_capabilities *entry = data; + + install_bp_hardening_cb(entry, + (bp_hardening_cb_t)qcom_bp_hardening, + __psci_hyp_bp_inval_start, + __psci_hyp_bp_inval_end); + return 0; +} + +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + #define MIDR_RANGE(model, min, max) \ .matches = is_affected_midr_range, \ .midr_model = model, \ .midr_range_min = min, \ .midr_range_max = max +#define MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = 0, \ + .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -79,6 +227,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_845719, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), }, + { + /* Kryo2xx Silver rAp4 */ + .desc = "Kryo2xx Silver erratum 845719", + .capability = ARM64_WORKAROUND_845719, + MIDR_RANGE(MIDR_KRYO2XX_SILVER, 0xA00004, 0xA00004), + }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 { @@ -97,6 +251,39 @@ const struct arm64_cpu_capabilities arm64_errata[] = { (1 << MIDR_VARIANT_SHIFT) | 1), }, #endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + MIDR_ALL_VERSIONS(MIDR_KRYO2XX_GOLD), + .enable = enable_psci_bp_hardening, + }, + { + .capability = ARM64_HARDEN_BRANCH_PREDICTOR, + .midr_model = MIDR_QCOM_KRYO, + .matches = is_kryo_midr, + .enable = enable_qcom_bp_hardening, + }, +#endif { } }; @@ -105,3 +292,8 @@ void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } + +void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index aee34290cd09..2a2bf5231f6a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -29,6 +29,7 @@ #include <asm/cpu_ops.h> #include <asm/processor.h> #include <asm/sysreg.h> +#include <asm/virt.h> unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -91,6 +92,7 @@ static struct arm64_ftr_bits ftr_id_aa64isar0[] = { static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), @@ -656,6 +658,11 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); } +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +{ + return is_kernel_in_hyp_mode(); +} + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ @@ -744,6 +751,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ + { + .desc = "Virtualization Host Extensions", + .capability = ARM64_HAS_VIRT_HOST_EXTN, + .matches = runs_at_el2, + }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .capability = ARM64_UNMAP_KERNEL_AT_EL0, @@ -867,8 +879,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; @@ -880,7 +891,8 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) * uses an IPI, giving us a PSTATE that disappears when * we return. */ - stop_machine(caps[i].enable, NULL, cpu_online_mask); + stop_machine(caps[i].enable, (void *)&caps[i], + cpu_online_mask); } #ifdef CONFIG_HOTPLUG_CPU @@ -995,7 +1007,7 @@ void verify_local_cpu_capabilities(void) if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) fail_incapable_cpu("arm64_features", &caps[i]); if (caps[i].enable) - caps[i].enable(NULL); + caps[i].enable((void *)&caps[i]); } for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { @@ -1027,6 +1039,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); + enable_errata_workarounds(); setup_cpu_hwcaps(); /* Advertise that we have computed the system capabilities */ @@ -1040,9 +1053,9 @@ void __init setup_cpu_features(void) if (!cwg) pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + if (ARCH_DMA_MINALIGN < cls) + pr_warn("ARCH_DMA_MINALIGN smaller than the Cache Writeback Granule (%d < %d)\n", + ARCH_DMA_MINALIGN, cls); } static bool __maybe_unused diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 95a6fae54740..4b2caefd3a8f 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -19,6 +19,7 @@ #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> +#include <asm/elf.h> #include <linux/bitops.h> #include <linux/bug.h> @@ -33,6 +34,10 @@ #include <linux/sched.h> #include <linux/smp.h> #include <linux/delay.h> +#include <linux/of_fdt.h> + +char* (*arch_read_hardware_id)(void); +EXPORT_SYMBOL(arch_read_hardware_id); /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -106,7 +111,9 @@ static int c_show(struct seq_file *m, void *v) int i, j; bool compat = personality(current->personality) == PER_LINUX32; - for_each_online_cpu(i) { + seq_printf(m, "Processor\t: AArch64 Processor rev %d (%s)\n", + read_cpuid_id() & 15, ELF_PLATFORM); + for_each_present_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); u32 midr = cpuinfo->reg_midr; @@ -156,6 +163,11 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); } + if (!arch_read_hardware_id) + seq_printf(m, "Hardware\t: %s\n", machine_name); + else + seq_printf(m, "Hardware\t: %s\n", arch_read_hardware_id()); + return 0; } @@ -201,7 +213,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) if (l1ip == ICACHE_POLICY_AIVIVT) set_bit(ICACHEF_AIVIVT, &__icache_flags); - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); + pr_debug("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) @@ -215,7 +227,12 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1); - info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1); + /* + * Explicitly mask out 16KB granule since we donot + * want to support it + */ + info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1) & + (~MMFR0_EL1_16KGRAN_MASK); info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1); info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 77fbcabcd9e3..50392fe170a9 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -23,6 +23,7 @@ #include <linux/hardirq.h> #include <linux/init.h> #include <linux/ptrace.h> +#include <linux/kprobes.h> #include <linux/stat.h> #include <linux/uaccess.h> @@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr) asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); local_dbg_restore(flags); } +NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { @@ -55,6 +57,7 @@ static u32 mdscr_read(void) asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); return mdscr; } +NOKPROBE_SYMBOL(mdscr_read); /* * Allow root to disable self-hosted debug from userspace. @@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { @@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. @@ -173,6 +178,7 @@ static void set_regs_spsr_ss(struct pt_regs *regs) spsr |= DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(set_regs_spsr_ss); static void clear_regs_spsr_ss(struct pt_regs *regs) { @@ -182,6 +188,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) spsr &= ~DBG_SPSR_SS; regs->pstate = spsr; } +NOKPROBE_SYMBOL(clear_regs_spsr_ss); /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); @@ -225,6 +232,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +NOKPROBE_SYMBOL(call_step_hook); static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -253,6 +261,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return 0; +#endif if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; @@ -266,6 +278,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(single_step_handler); /* * Breakpoint handler is re-entrant as another breakpoint can @@ -303,6 +316,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } +NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -318,13 +332,21 @@ static int brk_handler(unsigned long addr, unsigned int esr, }; force_sig_info(SIGTRAP, &info, current); - } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { - pr_warning("Unexpected kernel BRK exception at EL1\n"); + } +#ifdef CONFIG_KPROBES + else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) + return -EFAULT; + } +#endif + else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } return 0; } +NOKPROBE_SYMBOL(brk_handler); int aarch32_break_handler(struct pt_regs *regs) { @@ -369,6 +391,7 @@ int aarch32_break_handler(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); return 0; } +NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { @@ -390,6 +413,7 @@ void user_rewind_single_step(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { @@ -405,6 +429,7 @@ void kernel_enable_single_step(struct pt_regs *regs) mdscr_write(mdscr_read() | DBG_MDSCR_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { @@ -412,12 +437,14 @@ void kernel_disable_single_step(void) mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); return mdscr_read() & DBG_MDSCR_SS; } +NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) @@ -427,8 +454,10 @@ void user_enable_single_step(struct task_struct *task) if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_enable_single_step); void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); } +NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index f82036e02485..936022f0655e 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - movz x21, #:abs_g0:stext_offset + ldr w21, =stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index c44a82f146b1..1ffe15459c92 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -64,4 +64,20 @@ ENTRY(fpsimd_load_partial_state) ret ENDPROC(fpsimd_load_partial_state) +#ifdef CONFIG_ENABLE_FP_SIMD_SETTINGS +ENTRY(fpsimd_enable_trap) + mrs x0, cpacr_el1 + bic x0, x0, #(3 << 20) + orr x0, x0, #(1 << 20) + msr cpacr_el1, x0 + ret +ENDPROC(fpsimd_enable_trap) +ENTRY(fpsimd_disable_trap) + mrs x0, cpacr_el1 + orr x0, x0, #(3 << 20) + msr cpacr_el1, x0 + ret +ENDPROC(fpsimd_disable_trap) +#endif + #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4ed652aa62fd..f0ca0eb3b077 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -376,6 +376,7 @@ tsk .req x28 // current thread_info /* * Exception vectors. */ + .pushsection ".entry.text", "ax" .align 11 ENTRY(vectors) @@ -465,6 +466,9 @@ ENDPROC(el1_error_invalid) */ .align 6 el1_sync: +#ifdef CONFIG_QCOM_TLB_EL2_HANDLER + smc #0xffff +#endif kernel_entry 1 mrs x1, esr_el1 // read the syndrome register lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class @@ -548,6 +552,7 @@ el1_irq: bl trace_hardirqs_off #endif + get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT @@ -628,7 +633,7 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc + b.eq el0_fpsimd_acc_compat cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception @@ -681,13 +686,15 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 - // enable interrupts before calling the main handler - enable_dbg_and_irq + enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp - bl do_mem_abort + bl do_el0_ia_bp_hardening b ret_to_user el0_fpsimd_acc: /* @@ -699,6 +706,17 @@ el0_fpsimd_acc: mov x1, sp bl do_fpsimd_acc b ret_to_user +el0_fpsimd_acc_compat: + /* + * Floating Point or Advanced SIMD access + */ + enable_dbg + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_fpsimd_acc_compat + b ret_to_user + el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception @@ -809,6 +827,33 @@ ENTRY(cpu_switch_to) ldp x27, x28, [x8], #16 ldp x29, x9, [x8], #16 ldr lr, [x8] +#ifdef CONFIG_ARM64_REG_REBALANCE_ON_CTX_SW + orr x13, x13, x13 + orr x14, x14, x14 + orr x15, x15, x15 + orr x16, x16, x16 + orr x17, x17, x17 + orr x18, x18, x18 + orr x19, x19, x19 + orr x20, x20, x20 + orr x21, x21, x21 + mov v0.16b, v0.16b + mov v1.16b, v1.16b + mov v2.16b, v2.16b + mov v3.16b, v3.16b + mov v4.16b, v4.16b + mov v5.16b, v5.16b + mov v6.16b, v6.16b + mov v7.16b, v7.16b + mov v8.16b, v8.16b + mov v9.16b, v9.16b + mov v10.16b, v10.16b + mov v11.16b, v11.16b + mov v12.16b, v12.16b + mov v13.16b, v13.16b + mov v14.16b, v14.16b + mov v15.16b, v15.16b +#endif mov sp, x9 #ifdef CONFIG_THREAD_INFO_IN_TASK msr sp_el0, x1 @@ -953,6 +998,7 @@ __ni_sys_trace: bl do_ni_syscall b __sys_trace_return + .popsection // .entry.text #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 /* * Exception vectors trampoline. diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f995dae1c8fd..7950df171d86 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -20,6 +20,7 @@ #include <linux/cpu.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/signal.h> @@ -27,6 +28,7 @@ #include <asm/fpsimd.h> #include <asm/cputype.h> +#include <asm/app_api.h> #define FPEXC_IOF (1 << 0) #define FPEXC_DZF (1 << 1) @@ -35,6 +37,8 @@ #define FPEXC_IXF (1 << 4) #define FPEXC_IDF (1 << 7) +#define FP_SIMD_BIT 31 + /* * In order to reduce the number of times the FPSIMD state is needlessly saved * and restored, we need to keep track of two things: @@ -88,14 +92,42 @@ * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); +static DEFINE_PER_CPU(int, fpsimd_stg_enable); + +static int fpsimd_settings = 0x1; /* default = 0x1 */ +module_param(fpsimd_settings, int, 0644); + +void fpsimd_settings_enable(void) +{ + set_app_setting_bit(FP_SIMD_BIT); +} + +void fpsimd_settings_disable(void) +{ + clear_app_setting_bit(FP_SIMD_BIT); +} /* * Trapped FP/ASIMD access. */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) { - /* TODO: implement lazy context saving/restoring */ - WARN_ON(1); + if (!fpsimd_settings) + return; + + fpsimd_disable_trap(); + fpsimd_settings_disable(); + this_cpu_write(fpsimd_stg_enable, 0); +} + +void do_fpsimd_acc_compat(unsigned int esr, struct pt_regs *regs) +{ + if (!fpsimd_settings) + return; + + fpsimd_disable_trap(); + fpsimd_settings_enable(); + this_cpu_write(fpsimd_stg_enable, 1); } /* @@ -135,6 +167,11 @@ void fpsimd_thread_switch(struct task_struct *next) if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); + if (fpsimd_settings && __this_cpu_read(fpsimd_stg_enable)) { + fpsimd_settings_disable(); + this_cpu_write(fpsimd_stg_enable, 0); + } + if (next->mm) { /* * If we are switching to a task whose most recent userland @@ -152,6 +189,14 @@ void fpsimd_thread_switch(struct task_struct *next) else set_ti_thread_flag(task_thread_info(next), TIF_FOREIGN_FPSTATE); + + if (!fpsimd_settings) + return; + + if (test_ti_thread_flag(task_thread_info(next), TIF_32BIT)) + fpsimd_enable_trap(); + else + fpsimd_disable_trap(); } } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8168277277dc..87c56a9bb41c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -51,9 +51,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#define KERNEL_START _text -#define KERNEL_END _end - /* * Kernel startup entry point. * --------------------------- @@ -102,8 +99,6 @@ _head: #endif #ifdef CONFIG_EFI - .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -123,11 +118,11 @@ optional_header: .short 0x20b // PE32+ format .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion - .long _end - stext // SizeOfCode + .long _end - efi_header_end // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long __efistub_entry - _head // AddressOfEntryPoint - .long __efistub_stext_offset // BaseOfCode + .long efi_header_end - _head // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -144,7 +139,7 @@ extra_header_fields: .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header - .long __efistub_stext_offset // SizeOfHeaders + .long efi_header_end - _head // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -188,10 +183,10 @@ section_table: .byte 0 .byte 0 .byte 0 // end of 0 padding of section name - .long _end - stext // VirtualSize - .long __efistub_stext_offset // VirtualAddress - .long _edata - stext // SizeOfRawData - .long __efistub_stext_offset // PointerToRawData + .long _end - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long _edata - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -200,15 +195,18 @@ section_table: .long 0xe0500020 // Characteristics (section flags) /* - * EFI will load stext onwards at the 4k section alignment + * EFI will load .text onwards at the 4k section alignment * described in the PE/COFF header. To ensure that instruction * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that stext is + * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ .align 12 +efi_header_end: #endif + __INIT + ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode @@ -222,13 +220,11 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, 0f // address to jump to after + bl __cpu_setup // initialise processor + adr_l x27, __primary_switch // address to jump to after // MMU has been enabled - adr_l lr, __enable_mmu // return (PIC) address - b __cpu_setup // initialise processor + b __enable_mmu ENDPROC(stext) - .align 3 -0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -338,7 +334,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =SWAPPER_MM_MMUFLAGS + mov x7, SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. @@ -394,12 +390,13 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - ldr x5, =KIMAGE_VADDR + mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr w6, kernel_img_size - add x6, x6, x5 - mov x3, x24 // phys offset + adrp x6, _end // runtime __pa(_end) + adrp x3, _text // runtime __pa(_text) + sub x6, x6, x3 // _end - _text + add x6, x6, x5 // runtime __va(_end) create_block_map x0, x7, x3, x5, x6 /* @@ -414,16 +411,13 @@ __create_page_tables: ret x28 ENDPROC(__create_page_tables) - -kernel_img_size: - .long _end - (_head - TEXT_OFFSET) .ltorg /* * The following fragment of code is executed with the MMU enabled. */ .set initial_sp, init_thread_union + THREAD_START_SP -__mmap_switched: +__primary_switched: mov x28, lr // preserve LR adr_l x8, vectors // load VBAR_EL1 with virtual @@ -438,44 +432,6 @@ __mmap_switched: bl __pi_memset dsb ishst // Make zero page visible to PTW -#ifdef CONFIG_RELOCATABLE - - /* - * Iterate over each entry in the relocation table, and apply the - * relocations in place. - */ - adr_l x8, __dynsym_start // start of symbol table - adr_l x9, __reloc_start // start of reloc table - adr_l x10, __reloc_end // end of reloc table - -0: cmp x9, x10 - b.hs 2f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE - b.ne 1f - add x13, x13, x23 // relocate - str x13, [x11, x23] - b 0b - -1: cmp w12, #R_AARCH64_ABS64 - b.ne 0b - add x12, x12, x12, lsl #1 // symtab offset: 24x top word - add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word - ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx - ldr x15, [x12, #8] // Elf64_Sym::st_value - cmp w14, #-0xf // SHN_ABS (0xfff1) ? - add x14, x15, x23 // relocate - csel x15, x14, x15, ne - add x15, x13, x15 - str x15, [x11, x23] - b 0b - -2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr - dc cvac, x8 // value visible to secondaries - dsb sy // with MMU off -#endif - #ifdef CONFIG_THREAD_INFO_IN_TASK adrp x4, init_thread_union add sp, x4, #THREAD_SIZE @@ -512,13 +468,13 @@ __mmap_switched: 0: #endif b start_kernel -ENDPROC(__mmap_switched) +ENDPROC(__primary_switched) /* * end early head section, begin head code that is also used for * hotplug and needs to have the same protections as the text region */ - .section ".text","ax" + .section ".idmap.text","ax" ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET @@ -628,7 +584,7 @@ ENDPROC(el2_setup) * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed * in x20. See arch/arm64/include/asm/virt.h for more info. */ -ENTRY(set_cpu_boot_mode_flag) +set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f @@ -640,17 +596,29 @@ ENTRY(set_cpu_boot_mode_flag) ENDPROC(set_cpu_boot_mode_flag) /* + * These values are written with the MMU off, but read with the MMU on. + * Writers will invalidate the corresponding address, discarding up to a + * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures + * sufficient alignment that the CWG doesn't overlap another section. + */ + .pushsection ".mmuoff.data.write", "aw" +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data..cacheline_aligned - .align L1_CACHE_SHIFT ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 .long BOOT_CPU_MODE_EL1 +/* + * The booting CPU updates the failed status @__early_cpu_boot_status, + * with MMU turned off. + */ +ENTRY(__early_cpu_boot_status) + .long 0 + .popsection /* @@ -661,7 +629,7 @@ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 - ldr x1, =MPIDR_HWID_BITMASK + mov_q x1, MPIDR_HWID_BITMASK and x0, x0, x1 adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] @@ -681,7 +649,7 @@ ENTRY(secondary_entry) b secondary_startup ENDPROC(secondary_entry) -ENTRY(secondary_startup) +secondary_startup: /* * Common entry point for secondary CPUs. */ @@ -689,14 +657,11 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x8, kimage_vaddr - ldr w9, 0f - sub x27, x8, w9, sxtw // address to jump to after enabling the MMU + adr_l x27, __secondary_switch // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) -0: .long (_text - TEXT_OFFSET) - __secondary_switched -ENTRY(__secondary_switched) +__secondary_switched: adr_l x5, vectors msr vbar_el1, x5 isb @@ -727,9 +692,8 @@ ENDPROC(__secondary_switched) * Checks if the selected granule size is supported by the CPU. * If it isn't, park the CPU */ - .section ".idmap.text", "ax" -__enable_mmu: - mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value +ENTRY(__enable_mmu) + mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED @@ -756,7 +720,7 @@ __enable_mmu: * to take into account by discarding the current kernel mapping and * creating a new one. */ - msr sctlr_el1, x18 // disable the MMU + msr sctlr_el1, x22 // disable the MMU isb bl __create_page_tables // recreate kernel mapping @@ -768,7 +732,6 @@ __enable_mmu: ic iallu // flush instructions fetched dsb nsh // via old mapping isb - add x27, x27, x23 // relocated __mmap_switched #endif br x27 ENDPROC(__enable_mmu) @@ -777,3 +740,38 @@ __no_granule_support: wfe b __no_granule_support ENDPROC(__no_granule_support) + +__primary_switch: +#ifdef CONFIG_RELOCATABLE + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + ldr w9, =__rela_offset // offset to reloc table + ldr w10, =__rela_size // size of reloc table + + mov_q x11, KIMAGE_VADDR // default virtual offset + add x11, x11, x23 // actual virtual offset + add x9, x9, x11 // __va(.rela) + add x10, x9, x10 // __va(.rela) + sizeof(.rela) + +0: cmp x9, x10 + b.hs 1f + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 0b + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: +#endif + ldr x8, =__primary_switched + br x8 +ENDPROC(__primary_switch) + +__secondary_switch: + ldr x8, =__secondary_switched + br x8 +ENDPROC(__secondary_switch) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 000000000000..46f29b6560ec --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse <james.morse@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <linux/errno.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 000000000000..8761eb95ed27 --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,520 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> +#include <linux/version.h> + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/irqflags.h> +#include <asm/memory.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* + * Load our new page tables. A strict BBM approach requires that we + * ensure that TLBs are free of any entries that may overlap with the + * global mappings we are about to install. + * + * For a real hibernate/resume cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI + * runtime services), while for a userspace-driven test_resume cycle it + * points to userspace page tables (and we must point it at a zero page + * ourselves). Elsewhere we only (un)install the idmap with preemption + * disabled, so T0SZ should be as required regardless. + */ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + write_sysreg(virt_to_phys(pgd), ttbr0_el1); + isb(); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + +#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel core startup/idle code to PoC*/ + dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); + dcache_clean_range(__idmap_text_start, __idmap_text_end); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr) +{ + pte_t pte = *src_pte; + + if (pte_valid(pte)) { + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, pte_clear_rdonly(pte)); + } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + /* + * debug_pagealloc will removed the PTE_VALID bit if + * the page isn't in use by the resume kernel. It may have + * been in use by the original kernel, in which case we need + * to put it back in our copy to do the restore. + * + * Before marking this entry valid, check the pfn should + * be mapped. + */ + BUG_ON(!pfn_valid(pte_pfn(pte))); + + set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte))); + } +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + _copy_pte(dst_pte, src_pte, addr); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + if (!zero_page) { + pr_err("Failed to allocate zero page."); + rc = -ENOMEM; + goto out; + } + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 9e7228bda4a1..bef4b659d816 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> +#include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> @@ -128,6 +129,7 @@ static u64 read_wb_reg(int reg, int n) return val; } +NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { @@ -141,6 +143,7 @@ static void write_wb_reg(int reg, int n, u64 val) } isb(); } +NOKPROBE_SYMBOL(write_wb_reg); /* * Convert a breakpoint privilege level to the corresponding exception @@ -158,6 +161,7 @@ static enum dbg_active_el debug_exception_level(int privilege) return -EINVAL; } } +NOKPROBE_SYMBOL(debug_exception_level); enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, @@ -616,6 +620,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) write_wb_reg(reg, i, ctrl); } } +NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. @@ -695,6 +700,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(breakpoint_handler); /* * Arm64 hardware does not always report a watchpoint hit address that matches @@ -835,6 +841,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. @@ -892,6 +899,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) return !handled_exception; } +NOKPROBE_SYMBOL(reinstall_suspended_bps); /* * Context-switcher for restoring suspended breakpoints. diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 096e957aecb0..ba0127e31b1a 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,8 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/ptrace.h> #include <asm/virt.h> @@ -55,15 +57,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -103,10 +116,18 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) - mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x0, #HVC_GET_VECTORS hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 5e360ce88f10..86d444f9c2c1 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -73,6 +73,8 @@ #ifdef CONFIG_EFI +__efistub_stext_offset = stext - _text; + /* * Prevent the symbol aliases below from being emitted into the kallsyms * table, by forcing them to be absolute symbols (which are conveniently diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 67d3100369c6..a3f8f8bbfc92 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -2,7 +2,7 @@ * Copyright (C) 2013 Huawei Ltd. * Author: Jiang Liu <liuj97@gmail.com> * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -30,6 +30,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/fixmap.h> +#include <asm/opcodes.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -95,7 +96,8 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); - else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + else if (!module && (IS_ENABLED(CONFIG_DEBUG_RODATA) + || IS_ENABLED(CONFIG_KERNEL_TEXT_RDONLY))) page = phys_to_page(__pa_symbol(addr)); else return addr; @@ -162,6 +164,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) aarch64_insn_is_nop(insn); } +bool __kprobes aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + +bool __kprobes aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, b.cond, br, blr */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_bcond(insn); +} + /* * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * Section B2.6.5 "Concurrent modification and execution of instructions": @@ -363,6 +391,9 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 immlo, immhi, mask; int shift; + if (insn == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + switch (type) { case AARCH64_INSN_IMM_ADR: shift = 0; @@ -377,7 +408,7 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) { pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", type); - return 0; + return AARCH64_BREAK_FAULT; } } @@ -394,9 +425,12 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, { int shift; + if (insn == AARCH64_BREAK_FAULT) + return AARCH64_BREAK_FAULT; + if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) { pr_err("%s: unknown register encoding %d\n", __func__, reg); - return 0; + return AARCH64_BREAK_FAULT; } switch (type) { @@ -417,7 +451,7 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, default: pr_err("%s: unknown register type encoding %d\n", __func__, type); - return 0; + return AARCH64_BREAK_FAULT; } insn &= ~(GENMASK(4, 0) << shift); @@ -446,7 +480,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, break; default: pr_err("%s: unknown size encoding %d\n", __func__, type); - return 0; + return AARCH64_BREAK_FAULT; } insn &= ~GENMASK(31, 30); @@ -460,14 +494,17 @@ static inline long branch_imm_common(unsigned long pc, unsigned long addr, { long offset; - /* - * PC: A 64-bit Program Counter holding the address of the current - * instruction. A64 instructions must be word-aligned. - */ - BUG_ON((pc & 0x3) || (addr & 0x3)); + if ((pc & 0x3) || (addr & 0x3)) { + pr_err("%s: A64 instructions must be word aligned\n", __func__); + return range; + } offset = ((long)addr - (long)pc); - BUG_ON(offset < -range || offset >= range); + + if (offset < -range || offset >= range) { + pr_err("%s: offset out of range\n", __func__); + return range; + } return offset; } @@ -484,6 +521,8 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * texts are within +/-128M. */ offset = branch_imm_common(pc, addr, SZ_128M); + if (offset >= SZ_128M) + return AARCH64_BREAK_FAULT; switch (type) { case AARCH64_INSN_BRANCH_LINK: @@ -493,7 +532,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_b_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -510,6 +549,8 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, long offset; offset = branch_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; switch (type) { case AARCH64_INSN_BRANCH_COMP_ZERO: @@ -519,7 +560,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_cbnz_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -530,7 +571,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -550,7 +591,10 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, insn = aarch64_insn_get_bcond_value(); - BUG_ON(cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL); + if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) { + pr_err("%s: unknown condition encoding %d\n", __func__, cond); + return AARCH64_BREAK_FAULT; + } insn |= cond; return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, @@ -583,7 +627,7 @@ u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, insn = aarch64_insn_get_ret_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown branch encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -606,7 +650,7 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, insn = aarch64_insn_get_str_reg_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -645,26 +689,30 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, insn = aarch64_insn_get_stp_post_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - /* offset must be multiples of 4 in the range [-256, 252] */ - BUG_ON(offset & 0x3); - BUG_ON(offset < -256 || offset > 252); + if ((offset & 0x3) || (offset < -256) || (offset > 252)) { + pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n", + __func__, offset); + return AARCH64_BREAK_FAULT; + } shift = 2; break; case AARCH64_INSN_VARIANT_64BIT: - /* offset must be multiples of 8 in the range [-512, 504] */ - BUG_ON(offset & 0x7); - BUG_ON(offset < -512 || offset > 504); + if ((offset & 0x7) || (offset < -512) || (offset > 504)) { + pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n", + __func__, offset); + return AARCH64_BREAK_FAULT; + } shift = 3; insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -702,7 +750,7 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn = aarch64_insn_get_subs_imm_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown add/sub encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -713,11 +761,14 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } - BUG_ON(imm & ~(SZ_4K - 1)); + if (imm & ~(SZ_4K - 1)) { + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -746,7 +797,7 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, insn = aarch64_insn_get_sbfm_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown bitfield encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -759,12 +810,18 @@ u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, mask = GENMASK(5, 0); break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } - BUG_ON(immr & ~mask); - BUG_ON(imms & ~mask); + if (immr & ~mask) { + pr_err("%s: invalid immr encoding %d\n", __func__, immr); + return AARCH64_BREAK_FAULT; + } + if (imms & ~mask) { + pr_err("%s: invalid imms encoding %d\n", __func__, imms); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -793,23 +850,33 @@ u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst, insn = aarch64_insn_get_movn_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown movewide encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } - BUG_ON(imm & ~(SZ_64K - 1)); + if (imm & ~(SZ_64K - 1)) { + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift != 0 && shift != 16); + if (shift != 0 && shift != 16) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift != 0 && shift != 16 && shift != 32 && - shift != 48); + if (shift != 0 && shift != 16 && shift != 32 && shift != 48) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -843,20 +910,28 @@ u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst, insn = aarch64_insn_get_subs_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown add/sub encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift & ~(SZ_32 - 1)); + if (shift & ~(SZ_32 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift & ~(SZ_64 - 1)); + if (shift & ~(SZ_64 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -885,11 +960,15 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst, insn = aarch64_insn_get_rev32_value(); break; case AARCH64_INSN_DATA1_REVERSE_64: - BUG_ON(variant != AARCH64_INSN_VARIANT_64BIT); + if (variant != AARCH64_INSN_VARIANT_64BIT) { + pr_err("%s: invalid variant for reverse64 %d\n", + __func__, variant); + return AARCH64_BREAK_FAULT; + } insn = aarch64_insn_get_rev64_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data1 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -900,7 +979,7 @@ u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -937,7 +1016,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst, insn = aarch64_insn_get_rorv_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data2 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -948,7 +1027,7 @@ u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -976,7 +1055,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst, insn = aarch64_insn_get_msub_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown data3 encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } @@ -987,7 +1066,7 @@ u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst, insn |= AARCH64_INSN_SF_BIT; break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -1037,20 +1116,28 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, insn = aarch64_insn_get_bics_value(); break; default: - BUG_ON(1); + pr_err("%s: unknown logical encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - BUG_ON(shift & ~(SZ_32 - 1)); + if (shift & ~(SZ_32 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; case AARCH64_INSN_VARIANT_64BIT: insn |= AARCH64_INSN_SF_BIT; - BUG_ON(shift & ~(SZ_64 - 1)); + if (shift & ~(SZ_64 - 1)) { + pr_err("%s: invalid shift encoding %d\n", __func__, + shift); + return AARCH64_BREAK_FAULT; + } break; default: - BUG_ON(1); + pr_err("%s: unknown variant encoding %d\n", __func__, variant); return AARCH64_BREAK_FAULT; } @@ -1116,6 +1203,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +/* + * Extract the Op/CR data from a msr/mrs instruction. + */ +u32 aarch64_insn_extract_system_reg(u32 insn) +{ + return (insn & 0x1FFFE0) >> 5; +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; @@ -1141,3 +1236,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn) { return insn & CRM_MASK; } + +static bool __kprobes __check_eq(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) != 0; +} + +static bool __kprobes __check_ne(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) == 0; +} + +static bool __kprobes __check_cs(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_cc(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_mi(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_pl(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_vs(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) != 0; +} + +static bool __kprobes __check_vc(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) == 0; +} + +static bool __kprobes __check_hi(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_ls(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_ge(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_lt(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_gt(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_le(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_al(unsigned long pstate) +{ + return true; +} + +/* + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that + * it behaves identically to 0b1110 ("al"). + */ +pstate_check_t * const aarch32_opcode_cond_checks[16] = { + __check_eq, __check_ne, __check_cs, __check_cc, + __check_mi, __check_pl, __check_vs, __check_vc, + __check_hi, __check_ls, __check_ge, __check_lt, + __check_gt, __check_le, __check_al, __check_al +}; diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index 79b17384effa..d43ea93dc68d 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -19,6 +19,7 @@ #include <linux/export.h> #include <linux/types.h> #include <linux/io.h> +#include <linux/msm_rtb.h> /* * Copy data from IO memory space to "real" memory space. @@ -26,21 +27,21 @@ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { while (count && !IS_ALIGNED((unsigned long)from, 8)) { - *(u8 *)to = __raw_readb(from); + *(u8 *)to = __raw_readb_no_log(from); from++; to++; count--; } while (count >= 8) { - *(u64 *)to = __raw_readq(from); + *(u64 *)to = __raw_readq_no_log(from); from += 8; to += 8; count -= 8; } while (count) { - *(u8 *)to = __raw_readb(from); + *(u8 *)to = __raw_readb_no_log(from); from++; to++; count--; @@ -54,21 +55,21 @@ EXPORT_SYMBOL(__memcpy_fromio); void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { while (count && !IS_ALIGNED((unsigned long)to, 8)) { - __raw_writeb(*(u8 *)from, to); + __raw_writeb_no_log(*(u8 *)from, to); from++; to++; count--; } while (count >= 8) { - __raw_writeq(*(u64 *)from, to); + __raw_writeq_no_log(*(u64 *)from, to); from += 8; to += 8; count -= 8; } while (count) { - __raw_writeb(*(u8 *)from, to); + __raw_writeb_no_log(*(u8 *)from, to); from++; to++; count--; @@ -88,19 +89,19 @@ void __memset_io(volatile void __iomem *dst, int c, size_t count) qc |= qc << 32; while (count && !IS_ALIGNED((unsigned long)dst, 8)) { - __raw_writeb(c, dst); + __raw_writeb_no_log(c, dst); dst++; count--; } while (count >= 8) { - __raw_writeq(qc, dst); + __raw_writeq_no_log(qc, dst); dst += 8; count -= 8; } while (count) { - __raw_writeb(c, dst); + __raw_writeb_no_log(c, dst); dst++; count--; } diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 606c21760f23..49e6de3812d9 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -22,6 +22,7 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/kprobes.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -221,6 +222,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } +NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -232,6 +234,7 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } +NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -241,6 +244,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } +NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 997e6b27ff6a..d15b5c2935b3 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -20,16 +20,13 @@ * * AArch32 user helpers. * - * Each segment is 32-byte aligned and will be moved to the top of the high - * vector page. New segments (if ever needed) must be added in front of - * existing ones. This mechanism should be used only for things that are - * really small and justified, and not be abused freely. + * These helpers are provided for compatibility with AArch32 binaries that + * still need them. They are installed at a fixed address by + * aarch32_setup_additional_pages(). * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ -#include <asm/unistd.h> - .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -77,42 +74,3 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: - -/* - * AArch32 sigreturn code - * - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - * - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ - .globl __aarch32_sigret_code_start -__aarch32_sigret_code_start: - - /* - * ARM Code - */ - .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn - - /* - * ARM code - */ - .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn - - .globl __aarch32_sigret_code_end -__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 7f316982ce00..093c13541efb 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -32,11 +32,16 @@ void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; + /* Silence the initial allocation */ + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + gfp_mask |= __GFP_NOWARN; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + MODULES_VSIZE, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && diff --git a/arch/arm64/kernel/perf_debug.c b/arch/arm64/kernel/perf_debug.c new file mode 100644 index 000000000000..ef3313fd16c6 --- /dev/null +++ b/arch/arm64/kernel/perf_debug.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> + +/* + * Subsequent patches should add an entry to end of this string. + * Format is incrementing sequence number followed by text of + * patch commit title with newline. + * Note trailing ';' is on its own line to simplify addition of + * future strings. + */ +static char *descriptions = + " 0 arm64: perf: add debug patch logging framework\n" + " 1 Perf: arm64: Add L1 counters to tracepoints\n" + " 5 Perf: arm64: add perf user-mode permissions\n" + " 6 Perf: arm64: Add debugfs node to clear PMU\n" + " 7 Perf: arm64: Update PMU force reset\n" + "10 Perf: arm64: tracectr: initialize counts after hotplug\n" + "11 Perf: arm64: Refine disable/enable in tracecounters\n" + "15 Perf: arm64: make debug dir handle exportable\n" + "16 Perf: arm64: add perf trace user\n" + "17 Perf: arm64: add support for kryo pmu\n" +; + +static ssize_t desc_read(struct file *fp, char __user *buf, + size_t count, loff_t *pos) +{ + return simple_read_from_buffer(buf, count, pos, descriptions, + strlen(descriptions)); +} + +static const struct file_operations perf_debug_desc_fops = { + .read = desc_read, +}; + +static int perf_debugfs_init(void) +{ + int ret = 0; + struct dentry *dir; + struct dentry *file; + + dir = debugfs_create_dir("msm-perf-patches", NULL); + if (IS_ERR_OR_NULL(dir)) { + pr_err("failed to create msm-perf-patches dir in debugfs\n"); + ret = PTR_ERR(dir); + goto init_exit; + } + + file = debugfs_create_file("descriptions", 0444, dir, NULL, + &perf_debug_desc_fops); + if (IS_ERR_OR_NULL(file)) { + debugfs_remove(dir); + pr_err("failed to create descriptions file for msm-perf-patches\n"); + ret = PTR_ERR(file); + goto init_exit; + } + +init_exit: + return ret; +} +late_initcall(perf_debugfs_init); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c deleted file mode 100644 index e99a0ed7e66b..000000000000 --- a/arch/arm64/kernel/perf_event.c +++ /dev/null @@ -1,682 +0,0 @@ -/* - * PMU support - * - * Copyright (C) 2012 ARM Limited - * Author: Will Deacon <will.deacon@arm.com> - * - * This code is based heavily on the ARMv7 perf event code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/irq_regs.h> - -#include <linux/of.h> -#include <linux/perf/arm_pmu.h> -#include <linux/platform_device.h> - -/* - * ARMv8 PMUv3 Performance Events handling code. - * Common event types. - */ -enum armv8_pmuv3_perf_types { - /* Required events. */ - ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL = 0x03, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS = 0x04, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES = 0x11, - ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED = 0x12, - - /* At least one of the following is required. */ - ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED = 0x08, - ARMV8_PMUV3_PERFCTR_OP_SPEC = 0x1B, - - /* Common architectural events. */ - ARMV8_PMUV3_PERFCTR_MEM_READ = 0x06, - ARMV8_PMUV3_PERFCTR_MEM_WRITE = 0x07, - ARMV8_PMUV3_PERFCTR_EXC_TAKEN = 0x09, - ARMV8_PMUV3_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV8_PMUV3_PERFCTR_CID_WRITE = 0x0B, - ARMV8_PMUV3_PERFCTR_PC_WRITE = 0x0C, - ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN = 0x0E, - ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, - ARMV8_PMUV3_PERFCTR_TTBR_WRITE = 0x1C, - - /* Common microarchitectural events. */ - ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL = 0x01, - ARMV8_PMUV3_PERFCTR_ITLB_REFILL = 0x02, - ARMV8_PMUV3_PERFCTR_DTLB_REFILL = 0x05, - ARMV8_PMUV3_PERFCTR_MEM_ACCESS = 0x13, - ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS = 0x14, - ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB = 0x15, - ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS = 0x16, - ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL = 0x17, - ARMV8_PMUV3_PERFCTR_L2_CACHE_WB = 0x18, - ARMV8_PMUV3_PERFCTR_BUS_ACCESS = 0x19, - ARMV8_PMUV3_PERFCTR_MEM_ERROR = 0x1A, - ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, -}; - -/* ARMv8 Cortex-A53 specific event types. */ -enum armv8_a53_pmu_perf_types { - ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2, -}; - -/* ARMv8 Cortex-A57 specific event types. */ -enum armv8_a57_perf_types { - ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40, - ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41, - ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42, - ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43, - ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c, - ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d, -}; - -/* PMUv3 HW events mapping. */ -static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -/* ARM Cortex-A53 HW events mapping. */ -static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { - PERF_MAP_ALL_UNSUPPORTED, - [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, -}; - -static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, -}; - -static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - PERF_CACHE_MAP_ALL_UNSUPPORTED, - - [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD, - [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD, - [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST, - [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST, - - [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, - [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, - - [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD, - [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST, - - [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, - - [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, -}; - - -/* - * Perf Events' indices - */ -#define ARMV8_IDX_CYCLE_COUNTER 0 -#define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ - (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) - -#define ARMV8_MAX_COUNTERS 32 -#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) - -/* - * ARMv8 low level PMU access - */ - -/* - * Perf Event to low level counters mapping - */ -#define ARMV8_IDX_TO_COUNTER(x) \ - (((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) - -/* - * Per-CPU PMCR: config reg - */ -#define ARMV8_PMCR_E (1 << 0) /* Enable all counters */ -#define ARMV8_PMCR_P (1 << 1) /* Reset all counters */ -#define ARMV8_PMCR_C (1 << 2) /* Cycle counter reset */ -#define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ -#define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMCR_N_MASK 0x1f -#define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ - -/* - * PMOVSR: counters overflow flag status reg - */ -#define ARMV8_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_OVERFLOWED_MASK ARMV8_OVSR_MASK - -/* - * PMXEVTYPER: Event selection reg - */ -#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ -#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ - -/* - * Event filters for PMUv3 - */ -#define ARMV8_EXCLUDE_EL1 (1 << 31) -#define ARMV8_EXCLUDE_EL0 (1 << 30) -#define ARMV8_INCLUDE_EL2 (1 << 27) - -static inline u32 armv8pmu_pmcr_read(void) -{ - u32 val; - asm volatile("mrs %0, pmcr_el0" : "=r" (val)); - return val; -} - -static inline void armv8pmu_pmcr_write(u32 val) -{ - val &= ARMV8_PMCR_MASK; - isb(); - asm volatile("msr pmcr_el0, %0" :: "r" (val)); -} - -static inline int armv8pmu_has_overflowed(u32 pmovsr) -{ - return pmovsr & ARMV8_OVERFLOWED_MASK; -} - -static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) -{ - return idx >= ARMV8_IDX_CYCLE_COUNTER && - idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); -} - -static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) -{ - return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); -} - -static inline int armv8pmu_select_counter(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmselr_el0, %0" :: "r" (counter)); - isb(); - - return idx; -} - -static inline u32 armv8pmu_read_counter(struct perf_event *event) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - u32 value = 0; - - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); - else if (armv8pmu_select_counter(idx) == idx) - asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); - - return value; -} - -static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) -{ - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - if (!armv8pmu_counter_valid(cpu_pmu, idx)) - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); - else if (idx == ARMV8_IDX_CYCLE_COUNTER) - asm volatile("msr pmccntr_el0, %0" :: "r" (value)); - else if (armv8pmu_select_counter(idx) == idx) - asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); -} - -static inline void armv8pmu_write_evtype(int idx, u32 val) -{ - if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_EVTYPE_MASK; - asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); - } -} - -static inline int armv8pmu_enable_counter(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); - return idx; -} - -static inline int armv8pmu_disable_counter(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); - return idx; -} - -static inline int armv8pmu_enable_intens(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); - return idx; -} - -static inline int armv8pmu_disable_intens(int idx) -{ - u32 counter = ARMV8_IDX_TO_COUNTER(idx); - asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); - isb(); - /* Clear the overflow flag in case an interrupt is pending. */ - asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); - isb(); - - return idx; -} - -static inline u32 armv8pmu_getreset_flags(void) -{ - u32 value; - - /* Read */ - asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); - - /* Write to clear flags */ - value &= ARMV8_OVSR_MASK; - asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); - - return value; -} - -static void armv8pmu_enable_event(struct perf_event *event) -{ - unsigned long flags; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* - * Disable counter - */ - armv8pmu_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters). - */ - armv8pmu_write_evtype(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv8pmu_enable_intens(idx); - - /* - * Enable counter - */ - armv8pmu_enable_counter(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void armv8pmu_disable_event(struct perf_event *event) -{ - unsigned long flags; - struct hw_perf_event *hwc = &event->hw; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&events->pmu_lock, flags); - - /* - * Disable counter - */ - armv8pmu_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv8pmu_disable_intens(idx); - - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) -{ - u32 pmovsr; - struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; - struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmovsr = armv8pmu_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv8pmu_has_overflowed(pmovsr)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - for (idx = 0; idx < cpu_pmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - /* Ignore if we don't have an event. */ - if (!event) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event); - perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event)) - continue; - - if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(event); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv8pmu_start(struct arm_pmu *cpu_pmu) -{ - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) -{ - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); -} - -static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct perf_event *event) -{ - int idx; - struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT; - - /* Always place a cycle counter into the cycle counter. */ - if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { - if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV8_IDX_CYCLE_COUNTER; - } - - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; -} - -/* - * Add an event filter to a given event. This will only work for PMUv2 PMUs. - */ -static int armv8pmu_set_event_filter(struct hw_perf_event *event, - struct perf_event_attr *attr) -{ - unsigned long config_base = 0; - - if (attr->exclude_idle) - return -EPERM; - if (attr->exclude_user) - config_base |= ARMV8_EXCLUDE_EL0; - if (attr->exclude_kernel) - config_base |= ARMV8_EXCLUDE_EL1; - if (!attr->exclude_hv) - config_base |= ARMV8_INCLUDE_EL2; - - /* - * Install the filter into config_base as this is used to - * construct the event type. - */ - event->config_base = config_base; - - return 0; -} - -static void armv8pmu_reset(void *info) -{ - struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; - u32 idx, nb_cnt = cpu_pmu->num_events; - - /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { - armv8pmu_disable_counter(idx); - armv8pmu_disable_intens(idx); - } - - /* Initialize & Reset PMNC: C and P bits. */ - armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); -} - -static int armv8_pmuv3_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, - ARMV8_EVTYPE_EVENT); -} - -static int armv8_a53_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map, - ARMV8_EVTYPE_EVENT); -} - -static int armv8_a57_map_event(struct perf_event *event) -{ - return armpmu_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map, - ARMV8_EVTYPE_EVENT); -} - -static void armv8pmu_read_num_pmnc_events(void *info) -{ - int *nb_cnt = info; - - /* Read the nb of CNTx counters supported from PMNC */ - *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; - - /* Add the CPU cycles counter */ - *nb_cnt += 1; -} - -static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) -{ - return smp_call_function_any(&arm_pmu->supported_cpus, - armv8pmu_read_num_pmnc_events, - &arm_pmu->num_events, 1); -} - -static void armv8_pmu_init(struct arm_pmu *cpu_pmu) -{ - cpu_pmu->handle_irq = armv8pmu_handle_irq, - cpu_pmu->enable = armv8pmu_enable_event, - cpu_pmu->disable = armv8pmu_disable_event, - cpu_pmu->read_counter = armv8pmu_read_counter, - cpu_pmu->write_counter = armv8pmu_write_counter, - cpu_pmu->get_event_idx = armv8pmu_get_event_idx, - cpu_pmu->start = armv8pmu_start, - cpu_pmu->stop = armv8pmu_stop, - cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, - cpu_pmu->set_event_filter = armv8pmu_set_event_filter; -} - -static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) -{ - armv8_pmu_init(cpu_pmu); - cpu_pmu->name = "armv8_pmuv3"; - cpu_pmu->map_event = armv8_pmuv3_map_event; - return armv8pmu_probe_num_events(cpu_pmu); -} - -static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv8_pmu_init(cpu_pmu); - cpu_pmu->name = "armv8_cortex_a53"; - cpu_pmu->map_event = armv8_a53_map_event; - return armv8pmu_probe_num_events(cpu_pmu); -} - -static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) -{ - armv8_pmu_init(cpu_pmu); - cpu_pmu->name = "armv8_cortex_a57"; - cpu_pmu->map_event = armv8_a57_map_event; - return armv8pmu_probe_num_events(cpu_pmu); -} - -static const struct of_device_id armv8_pmu_of_device_ids[] = { - {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, - {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, - {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, - {}, -}; - -static int armv8_pmu_device_probe(struct platform_device *pdev) -{ - return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); -} - -static struct platform_driver armv8_pmu_driver = { - .driver = { - .name = "armv8-pmu", - .of_match_table = armv8_pmu_of_device_ids, - .suppress_bind_attrs = true, - }, - .probe = armv8_pmu_device_probe, -}; - -static int __init register_armv8_pmu_driver(void) -{ - return platform_driver_register(&armv8_pmu_driver); -} -device_initcall(register_armv8_pmu_driver); diff --git a/arch/arm64/kernel/perf_trace_counters.c b/arch/arm64/kernel/perf_trace_counters.c new file mode 100644 index 000000000000..7b852e36eaa2 --- /dev/null +++ b/arch/arm64/kernel/perf_trace_counters.c @@ -0,0 +1,180 @@ +/* Copyright (c) 2013-2014, 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/cpu.h> +#include <linux/tracepoint.h> +#include <trace/events/sched.h> +#define CREATE_TRACE_POINTS +#include "perf_trace_counters.h" + +static unsigned int tp_pid_state; + +DEFINE_PER_CPU(u32, cntenset_val); +DEFINE_PER_CPU(u32, previous_ccnt); +DEFINE_PER_CPU(u32[NUM_L1_CTRS], previous_l1_cnts); +DEFINE_PER_CPU(u32, old_pid); +DEFINE_PER_CPU(u32, hotplug_flag); + +static int tracectr_cpu_hotplug_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + + if ((action & (~CPU_TASKS_FROZEN)) == CPU_STARTING) + per_cpu(hotplug_flag, cpu) = 1; + + return NOTIFY_OK; +} + +static struct notifier_block tracectr_cpu_hotplug_notifier_block = { + .notifier_call = tracectr_cpu_hotplug_notifier, +}; + +static void setup_prev_cnts(u32 cpu, u32 cnten_val) +{ + int i; + + if (cnten_val & CC) + asm volatile("mrs %0, pmccntr_el0" + : "=r"(per_cpu(previous_ccnt, cpu))); + + for (i = 0; i < NUM_L1_CTRS; i++) { + if (cnten_val & (1 << i)) { + /* Select */ + asm volatile("msr pmselr_el0, %0" : : "r"(i)); + isb(); + /* Read value */ + asm volatile("mrs %0, pmxevcntr_el0" + : "=r"(per_cpu(previous_l1_cnts[i], cpu))); + } + } +} + +void tracectr_notifier(void *ignore, bool preempt, + struct task_struct *prev, struct task_struct *next) +{ + u32 cnten_val; + int current_pid; + u32 cpu = task_cpu(next); + + if (tp_pid_state != 1) + return; + current_pid = next->pid; + if (per_cpu(old_pid, cpu) != -1) { + asm volatile("mrs %0, pmcntenset_el0" : "=r" (cnten_val)); + per_cpu(cntenset_val, cpu) = cnten_val; + /* Disable all the counters that were enabled */ + asm volatile("msr pmcntenclr_el0, %0" : : "r" (cnten_val)); + + if (per_cpu(hotplug_flag, cpu) == 1) { + per_cpu(hotplug_flag, cpu) = 0; + setup_prev_cnts(cpu, cnten_val); + } else { + trace_sched_switch_with_ctrs(per_cpu(old_pid, cpu), + current_pid); + } + + /* Enable all the counters that were disabled */ + asm volatile("msr pmcntenset_el0, %0" : : "r" (cnten_val)); + } + per_cpu(old_pid, cpu) = current_pid; +} + +static void enable_tp_pid(void) +{ + if (tp_pid_state == 0) { + tp_pid_state = 1; + register_trace_sched_switch(tracectr_notifier, NULL); + } +} + +static void disable_tp_pid(void) +{ + if (tp_pid_state == 1) { + tp_pid_state = 0; + unregister_trace_sched_switch(tracectr_notifier, NULL); + } +} + +static ssize_t read_enabled_perftp_file_bool(struct file *file, + char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[2]; + buf[1] = '\n'; + if (tp_pid_state == 0) + buf[0] = '0'; + else + buf[0] = '1'; + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t write_enabled_perftp_file_bool(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + size_t buf_size; + + buf[0] = 0; + buf_size = min(count, (sizeof(buf)-1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + switch (buf[0]) { + case 'y': + case 'Y': + case '1': + enable_tp_pid(); + break; + case 'n': + case 'N': + case '0': + disable_tp_pid(); + break; + } + + return count; +} + +static const struct file_operations fops_perftp = { + .read = read_enabled_perftp_file_bool, + .write = write_enabled_perftp_file_bool, + .llseek = default_llseek, +}; + +int __init init_tracecounters(void) +{ + struct dentry *dir; + struct dentry *file; + unsigned int value = 1; + int cpu; + + dir = debugfs_create_dir("perf_debug_tp", NULL); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("enabled", 0660, dir, + &value, &fops_perftp); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + for_each_possible_cpu(cpu) + per_cpu(old_pid, cpu) = -1; + register_cpu_notifier(&tracectr_cpu_hotplug_notifier_block); + return 0; +} + +int __exit exit_tracecounters(void) +{ + unregister_cpu_notifier(&tracectr_cpu_hotplug_notifier_block); + return 0; +} +late_initcall(init_tracecounters); diff --git a/arch/arm64/kernel/perf_trace_counters.h b/arch/arm64/kernel/perf_trace_counters.h new file mode 100644 index 000000000000..ff3bd371791d --- /dev/null +++ b/arch/arm64/kernel/perf_trace_counters.h @@ -0,0 +1,111 @@ +/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM perf_trace_counters + +#if !defined(_PERF_TRACE_COUNTERS_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _PERF_TRACE_COUNTERS_H_ + +/* Ctr index for PMCNTENSET/CLR */ +#define CC 0x80000000 +#define C0 0x1 +#define C1 0x2 +#define C2 0x4 +#define C3 0x8 +#define C4 0x10 +#define C5 0x20 +#define C_ALL (CC | C0 | C1 | C2 | C3 | C4 | C5) +#define NUM_L1_CTRS 6 + +#include <linux/sched.h> +#include <linux/cpumask.h> +#include <linux/tracepoint.h> + +DECLARE_PER_CPU(u32, cntenset_val); +DECLARE_PER_CPU(u32, previous_ccnt); +DECLARE_PER_CPU(u32[NUM_L1_CTRS], previous_l1_cnts); +TRACE_EVENT(sched_switch_with_ctrs, + + TP_PROTO(pid_t prev, pid_t next), + + TP_ARGS(prev, next), + + TP_STRUCT__entry( + __field(pid_t, old_pid) + __field(pid_t, new_pid) + __field(u32, cctr) + __field(u32, ctr0) + __field(u32, ctr1) + __field(u32, ctr2) + __field(u32, ctr3) + __field(u32, ctr4) + __field(u32, ctr5) + ), + + TP_fast_assign( + u32 cpu = smp_processor_id(); + u32 i; + u32 cnten_val; + u32 total_ccnt = 0; + u32 total_cnt = 0; + u32 delta_l1_cnts[NUM_L1_CTRS]; + __entry->old_pid = prev; + __entry->new_pid = next; + + cnten_val = per_cpu(cntenset_val, cpu); + + if (cnten_val & CC) { + asm volatile("mrs %0, pmccntr_el0" + : "=r" (total_ccnt)); + /* Read value */ + __entry->cctr = total_ccnt - + per_cpu(previous_ccnt, cpu); + per_cpu(previous_ccnt, cpu) = total_ccnt; + } + for (i = 0; i < NUM_L1_CTRS; i++) { + if (cnten_val & (1 << i)) { + /* Select */ + asm volatile("msr pmselr_el0, %0" + : : "r" (i)); + isb(); + asm volatile("mrs %0, pmxevcntr_el0" + : "=r" (total_cnt)); + /* Read value */ + delta_l1_cnts[i] = total_cnt - + per_cpu(previous_l1_cnts[i], cpu); + per_cpu(previous_l1_cnts[i], cpu) = + total_cnt; + } else + delta_l1_cnts[i] = 0; + } + + __entry->ctr0 = delta_l1_cnts[0]; + __entry->ctr1 = delta_l1_cnts[1]; + __entry->ctr2 = delta_l1_cnts[2]; + __entry->ctr3 = delta_l1_cnts[3]; + __entry->ctr4 = delta_l1_cnts[4]; + __entry->ctr5 = delta_l1_cnts[5]; + ), + + TP_printk("prev_pid=%d, next_pid=%d, CCNTR: %u, CTR0: %u, CTR1: %u, CTR2: %u, CTR3: %u, CTR4: %u, CTR5: %u", + __entry->old_pid, __entry->new_pid, + __entry->cctr, __entry->ctr0, __entry->ctr1, + __entry->ctr2, __entry->ctr3, + __entry->ctr4, __entry->ctr5) +); + +#endif +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/arm64/kernel +#define TRACE_INCLUDE_FILE perf_trace_counters +#include <trace/define_trace.h> diff --git a/arch/arm64/kernel/perf_trace_user.c b/arch/arm64/kernel/perf_trace_user.c new file mode 100644 index 000000000000..98bbb2045265 --- /dev/null +++ b/arch/arm64/kernel/perf_trace_user.c @@ -0,0 +1,96 @@ +/* Copyright (c) 2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/perf_event.h> +#include <linux/types.h> +#include <linux/tracepoint.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <linux/preempt.h> +#include <linux/stat.h> +#include <asm/uaccess.h> + +#define CREATE_TRACE_POINTS +#include "perf_trace_user.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM perf_trace_counters + +#define TRACE_USER_MAX_BUF_SIZE 100 + +static ssize_t perf_trace_write(struct file *file, + const char __user *user_string_in, + size_t len, loff_t *ppos) +{ + u32 cnten_val; + int rc; + char buf[TRACE_USER_MAX_BUF_SIZE + 1]; + ssize_t length; + + if (len == 0) + return 0; + + length = len > TRACE_USER_MAX_BUF_SIZE ? TRACE_USER_MAX_BUF_SIZE : len; + + rc = copy_from_user(buf, user_string_in, length); + if (rc) { + pr_err("%s copy_from_user failed, rc=%d\n", __func__, rc); + return length; + } + + /* Remove any trailing newline and make sure string is terminated */ + if (buf[length - 1] == '\n') + buf[length - 1] = '\0'; + else + buf[length] = '\0'; + + /* + * Disable preemption to ensure that all the performance counter + * accesses happen on the same cpu + */ + preempt_disable(); + /* stop counters, call the trace function, restart them */ + + asm volatile("mrs %0, pmcntenset_el0" : "=r" (cnten_val)); + /* Disable all the counters that were enabled */ + asm volatile("msr pmcntenclr_el0, %0" : : "r" (cnten_val)); + + trace_perf_trace_user(buf, cnten_val); + + /* Enable all the counters that were disabled */ + asm volatile("msr pmcntenset_el0, %0" : : "r" (cnten_val)); + preempt_enable(); + + return length; +} + +static const struct file_operations perf_trace_fops = { + .write = perf_trace_write +}; + +static int __init init_perf_trace(void) +{ + struct dentry *dir; + struct dentry *file; + unsigned int value = 1; + + dir = perf_create_debug_dir(); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("trace_marker", S_IWUSR | S_IWGRP, dir, + &value, &perf_trace_fops); + if (!file) + return -ENOMEM; + + return 0; +} + +late_initcall(init_perf_trace); diff --git a/arch/arm64/kernel/perf_trace_user.h b/arch/arm64/kernel/perf_trace_user.h new file mode 100644 index 000000000000..e5f7336029af --- /dev/null +++ b/arch/arm64/kernel/perf_trace_user.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#if !defined(_PERF_TRACE_USER_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _PERF_TRACE_USER_H_ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM perf_trace_counters + +#include <linux/tracepoint.h> + +#define CNTENSET_CC 0x80000000 +#define NUM_L1_CTRS 4 + +TRACE_EVENT(perf_trace_user, + TP_PROTO(char *string, u32 cnten_val), + TP_ARGS(string, cnten_val), + + TP_STRUCT__entry( + __field(u32, cctr) + __field(u32, ctr0) + __field(u32, ctr1) + __field(u32, ctr2) + __field(u32, ctr3) + __field(u32, lctr0) + __field(u32, lctr1) + __string(user_string, string) + ), + + TP_fast_assign( + u32 cnt; + u32 l1_cnts[NUM_L1_CTRS]; + int i; + + if (cnten_val & CNTENSET_CC) { + /* Read value */ + asm volatile("mrs %0, pmccntr_el0" : "=r" (cnt)); + __entry->cctr = cnt; + } else + __entry->cctr = 0; + for (i = 0; i < NUM_L1_CTRS; i++) { + if (cnten_val & (1 << i)) { + /* Select */ + asm volatile("msr pmselr_el0, %0" + : : "r" (i)); + isb(); + /* Read value */ + asm volatile("mrs %0, pmxevcntr_el0" + : "=r" (cnt)); + l1_cnts[i] = cnt; + } else { + l1_cnts[i] = 0; + } + } + + __entry->ctr0 = l1_cnts[0]; + __entry->ctr1 = l1_cnts[1]; + __entry->ctr2 = l1_cnts[2]; + __entry->ctr3 = l1_cnts[3]; + __entry->lctr0 = 0; + __entry->lctr1 = 0; + __assign_str(user_string, string); + ), + + TP_printk("CCNTR: %u, CTR0: %u, CTR1: %u, CTR2: %u, CTR3: %u, L2CTR0: %u, L2CTR1: %u, MSG=%s", + __entry->cctr, __entry->ctr0, __entry->ctr1, + __entry->ctr2, __entry->ctr3, + __entry->lctr0, __entry->lctr1, + __get_str(user_string) + ) + ); + +#endif +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/arm64/kernel +#define TRACE_INCLUDE_FILE perf_trace_user +#include <trace/define_trace.h> diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile new file mode 100644 index 000000000000..ce06312e3d34 --- /dev/null +++ b/arch/arm64/kernel/probes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + kprobes_trampoline.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c new file mode 100644 index 000000000000..f7931d900bca --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -0,0 +1,174 @@ +/* + * arch/arm64/kernel/probes/decode-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <asm/kprobes.h> +#include <asm/insn.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static bool __kprobes aarch64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) { + if (aarch64_insn_is_branch(insn) || + aarch64_insn_is_msr_imm(insn) || + aarch64_insn_is_msr_reg(insn) || + aarch64_insn_is_exception(insn) || + aarch64_insn_is_eret(insn)) + return false; + + /* + * The MRS instruction may not return a correct value when + * executing in the single-stepping environment. We do make one + * exception, for reading the DAIF bits. + */ + if (aarch64_insn_is_mrs(insn)) + return aarch64_insn_extract_system_reg(insn) + != AARCH64_INSN_SPCLREG_DAIF; + + /* + * The HINT instruction is is problematic when single-stepping, + * except for the NOP case. + */ + if (aarch64_insn_is_hint(insn)) + return aarch64_insn_is_nop(insn); + + return true; + } + + /* + * Instructions which load PC relative literals are not going to work + * when executed from an XOL slot. Instructions doing an exclusive + * load/store are not going to complete successfully when single-step + * exception handling happens in the middle of the sequence. + */ + if (aarch64_insn_uses_literal(insn) || + aarch64_insn_is_exclusive(insn)) + return false; + + return true; +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +static enum kprobe_insn __kprobes +arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* + * Instructions reading or modifying the PC won't work from the XOL + * slot. + */ + if (aarch64_insn_is_steppable(insn)) + return INSN_GOOD; + + if (aarch64_insn_is_bcond(insn)) { + asi->handler = simulate_b_cond; + } else if (aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn)) { + asi->handler = simulate_cbz_cbnz; + } else if (aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn)) { + asi->handler = simulate_tbz_tbnz; + } else if (aarch64_insn_is_adr_adrp(insn)) { + asi->handler = simulate_adr_adrp; + } else if (aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn)) { + asi->handler = simulate_b_bl; + } else if (aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_ret(insn)) { + asi->handler = simulate_br_blr_ret; + } else if (aarch64_insn_is_ldr_lit(insn)) { + asi->handler = simulate_ldr_literal; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + asi->handler = simulate_ldrsw_literal; + } else { + /* + * Instruction cannot be stepped out-of-line and we don't + * (yet) simulate it. + */ + return INSN_REJECTED; + } + + return INSN_GOOD_NO_SLOT; +} + +static bool __kprobes +is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) +{ + while (scan_start > scan_end) { + /* + * atomic region starts from exclusive load and ends with + * exclusive store. + */ + if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start))) + return false; + else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start))) + return true; + scan_start--; + } + + return false; +} + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) +{ + enum kprobe_insn decoded; + kprobe_opcode_t insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_start = addr - 1; + kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + struct module *mod; +#endif + + if (addr >= (kprobe_opcode_t *)_text && + scan_end < (kprobe_opcode_t *)_text) + scan_end = (kprobe_opcode_t *)_text; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + else { + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_init((unsigned long)addr, mod) && + !within_module_init((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->module_init; + else if (mod && within_module_core((unsigned long)addr, mod) && + !within_module_core((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->module_core; + preempt_enable(); + } +#endif + decoded = arm_probe_decode_insn(insn, asi); + + if (decoded == INSN_REJECTED || + is_probed_address_atomic(scan_start, scan_end)) + return INSN_REJECTED; + + return decoded; +} diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h new file mode 100644 index 000000000000..d438289646a6 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/kernel/probes/decode-insn.h + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_ARM64_H +#define _ARM_KERNEL_KPROBES_ARM64_H + +/* + * ARM strongly recommends a limit of 128 bytes between LoadExcl and + * StoreExcl instructions in a single thread of execution. So keep the + * max atomic context size as 32. + */ +#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) + +enum kprobe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); + +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c new file mode 100644 index 000000000000..4ea8433011d0 --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes.c @@ -0,0 +1,657 @@ +/* + * arch/arm64/kernel/probes/kprobes.c + * + * Kprobes support for ARM64 + * + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <linux/stringify.h> +#include <asm/traps.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/insn.h> +#include <asm/uaccess.h> +#include <asm/irq.h> +#include <asm/sections.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + /* prepare insn slot */ + p->ainsn.insn[0] = cpu_to_le32(p->opcode); + + flush_icache_range((uintptr_t) (p->ainsn.insn), + (uintptr_t) (p->ainsn.insn) + + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + /* + * Needs restoring of return address after stepping xol. + */ + p->ainsn.restore = (unsigned long) p->addr + + sizeof(kprobe_opcode_t); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + /* This instructions is not executed xol. No need to adjust the PC */ + p->ainsn.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.handler) + p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + + /* single step simulated, now go for post processing */ + post_kprobe_handler(kcb, regs); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + extern char __start_rodata[]; + extern char __end_rodata[]; + + if (probe_addr & 0x3) + return -EINVAL; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + if (in_exception_text(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + /* decode instruction */ + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + break; + }; + + /* prepare the instruction */ + if (p->ainsn.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = (void *)addr; + insns[0] = (u32)opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + +/* arm kprobe: install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, BRK64_OPCODE_KPROBES); +} + +/* disarm kprobe: remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * The D-flag (Debug mask) is set (masked) upon debug exception entry. + * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive + * probe i.e. when probe hit from kprobe handler context upon + * executing the pre/post handlers. In this case we return with + * D-flag clear so that single-stepping can be carried-out. + * + * Leave D-flag set in all other cases. + */ +static void __kprobes +spsr_set_debug_flag(struct pt_regs *regs, int mask) +{ + unsigned long spsr = regs->pstate; + + if (mask) + spsr |= PSR_D_BIT; + else + spsr &= ~PSR_D_BIT; + + regs->pstate = spsr; +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_irqflag = regs->pstate; + regs->pstate |= PSR_I_BIT; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + if (kcb->saved_irqflag & PSR_I_BIT) + regs->pstate |= PSR_I_BIT; + else + regs->pstate &= ~PSR_I_BIT; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + + if (p->ainsn.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; + + set_ss_context(kcb, slot); /* mark pending ss */ + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); + else + WARN_ON(regs->pstate & PSR_D_BIT); + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.restore != 0) + instruction_pointer_set(regs, cur->ainsn.restore); + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + instruction_pointer_set(regs, (unsigned long) cur->addr); + if (!instruction_pointer(regs)) + BUG(); + + kernel_disable_single_step(); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static void __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry, + * so get out doing nothing more here. + * + * pre_handler can hit a breakpoint and can step thru + * before return, keep PSTATE D-flag enabled until + * pre_handler return back. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, regs, kcb, 0); + return; + } + } + } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == + BRK64_OPCODE_KPROBES) && cur_kprobe) { + /* We probably hit a jprobe. Call its break handler. */ + if (cur_kprobe->break_handler && + cur_kprobe->break_handler(cur_kprobe, regs)) { + setup_singlestep(cur_kprobe, regs, kcb, 0); + return; + } + } + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ +} + +static int __kprobes +kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == addr)) { + clear_ss_context(kcb); /* clear pending ss */ + return DBG_HOOK_HANDLED; + } + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int retval; + + /* return error if this is not our step */ + retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + + if (retval == DBG_HOOK_HANDLED) { + kprobes_restore_local_irqflag(kcb, regs); + kernel_disable_single_step(); + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + post_kprobe_handler(kcb, regs); + } + + return retval; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +{ + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + kcb->jprobe_saved_regs = *regs; + /* + * Since we can't be sure where in the stack frame "stacked" + * pass-by-value arguments are stored we just don't try to + * duplicate any of the stack. Do not use jprobes on functions that + * use more than 64 bytes (after padding each to an 8 byte boundary) + * of arguments, or pass individual arguments larger than 16 bytes. + */ + + instruction_pointer_set(regs, (unsigned long) jp->entry); + preempt_disable(); + pause_graph_tracing(); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * Jprobe handler return by entering break exception, + * encoded same as kprobe, but with following conditions + * -a special PC to identify it from the other kprobes. + * -restore stack addr to original saved pt_regs + */ + asm volatile(" mov sp, %0 \n" + "jprobe_return_break: brk %1 \n" + : + : "r" (kcb->jprobe_saved_regs.sp), + "I" (BRK64_ESR_KPROBES) + : "memory"); + + unreachable(); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_addr = kcb->jprobe_saved_regs.sp; + long orig_sp = kernel_stack_pointer(regs); + struct jprobe *jp = container_of(p, struct jprobe, kp); + extern const char jprobe_return_break[]; + + if (instruction_pointer(regs) != (u64) jprobe_return_break) + return 0; + + if (orig_sp != stack_addr) { + struct pt_regs *saved_regs = + (struct pt_regs *)kcb->jprobe_saved_regs.sp; + pr_err("current sp %lx does not match saved sp %lx\n", + orig_sp, stack_addr); + pr_err("Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + pr_err("Current registers\n"); + show_regs(regs); + BUG(); + } + unpause_graph_tracing(); + *regs = kcb->jprobe_saved_regs; + preempt_enable_no_resched(); + return 1; +} + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + extern char __idmap_text_start[], __idmap_text_end[]; + + if ((addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end) || + (addr >= (unsigned long)__idmap_text_start && + addr < (unsigned long)__idmap_text_end) || + !!search_exception_tables(addr)) + return true; + + + return false; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; + + /* replace return addr (x30) with trampoline */ + regs->regs[30] = (long)&kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000000..5d6e7f14638c --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,81 @@ +/* + * trampoline entry and return code for kretprobes. + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text + + .macro save_all_base_regs + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + add x0, sp, #S_FRAME_SIZE + stp lr, x0, [sp, #S_LR] + /* + * Construct a useful saved PSTATE + */ + mrs x0, nzcv + mrs x1, daif + orr x0, x0, x1 + mrs x1, CurrentEL + orr x0, x0, x1 + mrs x1, SPSel + orr x0, x0, x1 + stp xzr, x0, [sp, #S_PC] + .endm + + .macro restore_all_base_regs + ldr x0, [sp, #S_PSTATE] + and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) + msr nzcv, x0 + ldp x0, x1, [sp, #S_X0] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldp x8, x9, [sp, #S_X8] + ldp x10, x11, [sp, #S_X10] + ldp x12, x13, [sp, #S_X12] + ldp x14, x15, [sp, #S_X14] + ldp x16, x17, [sp, #S_X16] + ldp x18, x19, [sp, #S_X18] + ldp x20, x21, [sp, #S_X20] + ldp x22, x23, [sp, #S_X22] + ldp x24, x25, [sp, #S_X24] + ldp x26, x27, [sp, #S_X26] + ldp x28, x29, [sp, #S_X28] + .endm + +ENTRY(kretprobe_trampoline) + sub sp, sp, #S_FRAME_SIZE + + save_all_base_regs + + mov x0, sp + bl trampoline_probe_handler + /* + * Replace trampoline address in lr with actual orig_ret_addr return + * address. + */ + mov lr, x0 + + restore_all_base_regs + + add sp, sp, #S_FRAME_SIZE + ret + +ENDPROC(kretprobe_trampoline) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c new file mode 100644 index 000000000000..8977ce9d009d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -0,0 +1,217 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "simulate-insn.h" + +#define sign_extend(x, signbit) \ + ((x) | (0 - ((x) & (1 << (signbit))))) + +#define bbl_displacement(insn) \ + sign_extend(((insn) & 0x3ffffff) << 2, 27) + +#define bcond_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define cbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define tbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + +#define ldr_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = val; +} + +static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = lower_32_bits(val); +} + +static inline u64 get_x_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return regs->regs[reg]; + else + return 0; +} + +static inline u32 get_w_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return lower_32_bits(regs->regs[reg]); + else + return 0; +} + +static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0); +} + +static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0); +} + +static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0; +} + +static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0; +} + +/* + * instruction simulation functions + */ +void __kprobes +simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) +{ + long imm, xn, val; + + xn = opcode & 0x1f; + imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); + imm = sign_extend(imm, 20); + if (opcode & 0x80000000) + val = (imm<<12) + (addr & 0xfffffffffffff000); + else + val = imm + addr; + + set_x_reg(regs, xn, val); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = bbl_displacement(opcode); + + /* Link register is x30 */ + if (opcode & (1 << 31)) + set_x_reg(regs, 30, addr + 4); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff)) + disp = bcond_displacement(opcode); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs) +{ + int xn = (opcode >> 5) & 0x1f; + + /* update pc first in case we're doing a "blr lr" */ + instruction_pointer_set(regs, get_x_reg(regs, xn)); + + /* Link register is x30 */ + if (((opcode >> 21) & 0x3) == 1) + set_x_reg(regs, 30, addr + 4); +} + +void __kprobes +simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_cbnz(opcode, regs)) + disp = cbz_displacement(opcode); + } else { + if (check_cbz(opcode, regs)) + disp = cbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_tbnz(opcode, regs)) + disp = tbz_displacement(opcode); + } else { + if (check_tbz(opcode, regs)) + disp = tbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + u64 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (u64 *) (addr + disp); + + if (opcode & (1 << 30)) /* x0-x30 */ + set_x_reg(regs, xn, *load_addr); + else /* w0-w30 */ + set_w_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + s32 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (s32 *) (addr + disp); + + set_x_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h new file mode 100644 index 000000000000..050bde683c2d --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -0,0 +1,28 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H +#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H + +void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs); +void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs); +void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); + +#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 236120fc4dbc..5dd9b572259f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -87,6 +87,16 @@ void arch_cpu_idle(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } +void arch_cpu_idle_enter(void) +{ + idle_notifier_call_chain(IDLE_START); +} + +void arch_cpu_idle_exit(void) +{ + idle_notifier_call_chain(IDLE_END); +} + #ifdef CONFIG_HOTPLUG_CPU void arch_cpu_idle_dead(void) { @@ -182,7 +192,7 @@ static void show_data(unsigned long addr, int nbytes, const char *name) * don't attempt to dump non-kernel addresses or * values that are probably just small negative numbers */ - if (addr < PAGE_OFFSET || addr > -256UL) + if (addr < KIMAGE_VADDR || addr > -256UL) return; printk("\n%s: %#lx:\n", name, addr); @@ -207,29 +217,23 @@ static void show_data(unsigned long addr, int nbytes, const char *name) if (probe_kernel_address(p, data)) { printk(" ********"); } else { - printk(" %08x", data); + pr_cont(" %08x", data); } ++p; } - printk("\n"); + pr_cont("\n"); } } static void show_extra_register_data(struct pt_regs *regs, int nbytes) { mm_segment_t fs; - unsigned int i; fs = get_fs(); set_fs(KERNEL_DS); show_data(regs->pc - nbytes, nbytes * 2, "PC"); show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); show_data(regs->sp - nbytes, nbytes * 2, "SP"); - for (i = 0; i < 30; i++) { - char name[4]; - snprintf(name, sizeof(name), "X%u", i); - show_data(regs->regs[i] - nbytes, nbytes * 2, name); - } set_fs(fs); } @@ -260,7 +264,7 @@ void __show_regs(struct pt_regs *regs) printk("\n"); } if (!user_mode(regs)) - show_extra_register_data(regs, 128); + show_extra_register_data(regs, 64); printk("\n"); } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index dbff1ab8e92f..2f8094ab1cef 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -21,7 +21,6 @@ #include <linux/delay.h> #include <linux/mm.h> #include <linux/psci.h> -#include <linux/slab.h> #include <uapi/linux/psci.h> @@ -29,76 +28,10 @@ #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> -#include <asm/suspend.h> - -static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); - -static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu) -{ - int i, ret, count = 0; - u32 *psci_states; - struct device_node *state_node, *cpu_node; - - cpu_node = of_get_cpu_node(cpu, NULL); - if (!cpu_node) - return -ENODEV; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 state; - - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &state); - if (ret) { - pr_warn(" * %s missing arm,psci-suspend-param property\n", - state_node->full_name); - of_node_put(state_node); - goto free_mem; - } - - of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", state, i); - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - ret = -EINVAL; - goto free_mem; - } - psci_states[i] = state; - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} static int __init cpu_psci_cpu_init(unsigned int cpu) { + pr_info("Initializing psci_cpu_init\n"); return 0; } @@ -166,7 +99,7 @@ static int cpu_psci_cpu_kill(unsigned int cpu) do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed (polled %d ms)\n", cpu, + pr_debug("CPU%d killed (polled %d ms)\n", cpu, jiffies_to_msecs(jiffies - start)); return 0; } @@ -180,38 +113,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) } #endif -static int psci_suspend_finisher(unsigned long index) -{ - u32 *state = __this_cpu_read(psci_power_state); - - return psci_ops.cpu_suspend(state[index - 1], - virt_to_phys(cpu_resume)); -} - -static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) -{ - int ret; - u32 *state = __this_cpu_read(psci_power_state); - /* - * idle state index 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!index)) - return -EINVAL; - - if (!psci_power_state_loses_context(state[index - 1])) - ret = psci_ops.cpu_suspend(state[index - 1], 0); - else - ret = cpu_suspend(index, psci_suspend_finisher); - - return ret; -} - const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE - .cpu_init_idle = cpu_psci_cpu_init_idle, - .cpu_suspend = cpu_psci_cpu_suspend, + .cpu_init_idle = psci_cpu_init_idle, + .cpu_suspend = psci_cpu_suspend_enter, #endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index beff0fb11b6b..c67bd311e815 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -49,6 +49,106 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} +#define GPR_OFFSET_NAME(r) \ + {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])}, + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(pstate), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -1254,13 +1354,13 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { - /* Do the secure computing check first; failures should be fast. */ - if (secure_computing() == -1) - return -1; - if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + /* Do the secure computing after ptrace; failures should be fast. */ + if (secure_computing(NULL) == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 4bc5bc9463b8..01f259ec5700 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -44,6 +44,7 @@ #include <linux/of_platform.h> #include <linux/efi.h> #include <linux/psci.h> +#include <linux/dma-mapping.h> #include <linux/mm.h> #include <asm/acpi.h> @@ -65,8 +66,15 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +unsigned int boot_reason; +EXPORT_SYMBOL(boot_reason); + +unsigned int cold_boot; +EXPORT_SYMBOL(cold_boot); + phys_addr_t __fdt_pointer __initdata; +const char *machine_name; /* * Standard memory resources */ @@ -176,7 +184,6 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } static void __init setup_machine_fdt(phys_addr_t dt_phys) @@ -194,7 +201,11 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } - dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); + machine_name = of_flat_dt_get_machine_name(); + if (machine_name) { + dump_stack_set_arch_desc("%s (DT)", machine_name); + pr_info("Machine: %s\n", machine_name); + } } static void __init request_standard_resources(void) @@ -290,6 +301,8 @@ static inline void __init relocate_initrd(void) u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +void __init __weak init_random_pool(void) { } + void __init setup_arch(char **cmdline_p) { pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); @@ -368,6 +381,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + init_random_pool(); if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" @@ -401,7 +415,13 @@ static int __init topology_init(void) return 0; } -subsys_initcall(topology_init); +postcore_initcall(topology_init); + +void arch_setup_pdev_archdata(struct platform_device *pdev) +{ + pdev->archdata.dma_mask = DMA_BIT_MASK(32); + pdev->dev.dma_mask = &pdev->archdata.dma_mask; +} /* * Dump out kernel offset information on panic. diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8eafdbc7cb8..0bed9a899850 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -25,6 +25,7 @@ #include <linux/uaccess.h> #include <linux/tracehook.h> #include <linux/ratelimit.h> +#include <linux/syscalls.h> #include <asm/debug-monitors.h> #include <asm/elf.h> @@ -402,6 +403,9 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { + /* Check valid user FS if needed */ + addr_limit_user_check(); + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 107335637390..666363d127e5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -28,42 +28,7 @@ #include <asm/signal32.h> #include <asm/uaccess.h> #include <asm/unistd.h> - -struct compat_sigcontext { - /* We always set these two fields to 0 */ - compat_ulong_t trap_no; - compat_ulong_t error_code; - - compat_ulong_t oldmask; - compat_ulong_t arm_r0; - compat_ulong_t arm_r1; - compat_ulong_t arm_r2; - compat_ulong_t arm_r3; - compat_ulong_t arm_r4; - compat_ulong_t arm_r5; - compat_ulong_t arm_r6; - compat_ulong_t arm_r7; - compat_ulong_t arm_r8; - compat_ulong_t arm_r9; - compat_ulong_t arm_r10; - compat_ulong_t arm_fp; - compat_ulong_t arm_ip; - compat_ulong_t arm_sp; - compat_ulong_t arm_lr; - compat_ulong_t arm_pc; - compat_ulong_t arm_cpsr; - compat_ulong_t fault_address; -}; - -struct compat_ucontext { - compat_ulong_t uc_flags; - compat_uptr_t uc_link; - compat_stack_t uc_stack; - struct compat_sigcontext uc_mcontext; - compat_sigset_t uc_sigmask; - int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; - compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); -}; +#include <asm/vdso.h> struct compat_vfp_sigframe { compat_ulong_t magic; @@ -91,16 +56,6 @@ struct compat_aux_sigframe { unsigned long end_magic; } __attribute__((__aligned__(8))); -struct compat_sigframe { - struct compat_ucontext uc; - compat_ulong_t retcode[2]; -}; - -struct compat_rt_sigframe { - struct compat_siginfo info; - struct compat_sigframe sig; -}; - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) @@ -484,14 +439,27 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ +#ifdef CONFIG_VDSO32 + void *vdso_base = current->mm->context.vdso; + void *trampoline = + (ka->sa.sa_flags & SA_SIGINFO + ? (thumb + ? VDSO_SYMBOL(vdso_base, compat_rt_sigreturn_thumb) + : VDSO_SYMBOL(vdso_base, compat_rt_sigreturn_arm)) + : (thumb + ? VDSO_SYMBOL(vdso_base, compat_sigreturn_thumb) + : VDSO_SYMBOL(vdso_base, compat_sigreturn_arm))); + + retcode = ptr_to_compat(trampoline) + thumb; +#else + void *sigreturn_base = current->mm->context.vdso; unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) idx += 3; - retcode = AARCH32_VECTORS_BASE + - AARCH32_KERN_SIGRET_CODE_OFFSET + - (idx << 2) + thumb; + retcode = ptr_to_compat(sigreturn_base) + (idx << 2) + thumb; +#endif } regs->regs[0] = usig; diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S new file mode 100644 index 000000000000..6ecda4d84cd5 --- /dev/null +++ b/arch/arm64/kernel/sigreturn32.S @@ -0,0 +1,67 @@ +/* + * sigreturn trampolines for AArch32. + * + * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net> + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + +#include <asm/unistd.h> + + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 + // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef + + /* + * Thumb code + */ + // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x27 + // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf + + /* + * ARM code + */ + // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 + // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef + + /* + * Thumb code + */ + // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x27 + // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index f586f7c875e2..100f92f13113 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,39 +49,32 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -93,70 +86,36 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 - bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS + stp x29, lr, [sp, #-16]! + bl cpu_do_suspend + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg -/* - * x0 must contain the sctlr value retrieved from restored context - */ - .pushsection ".idmap.text", "ax" -ENTRY(cpu_resume_mmu) - ldr x3, =cpu_resume_after_mmu - msr sctlr_el1, x0 // restore sctlr_el1 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb - br x3 // global jump to virtual address -ENDPROC(cpu_resume_mmu) - .popsection -cpu_resume_after_mmu: - mov x0, #0 // return zero on success - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 - ret -ENDPROC(cpu_resume_after_mmu) - + .pushsection ".idmap.text", "ax" ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + adr_l x27, _resume_switched /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +_resume_switched: + ldr x8, =_cpu_resume + br x8 +ENDPROC(_resume_switched) + .ltorg + .popsection + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -166,17 +125,29 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - /* load physical address of identity map page table in x1 */ - adrp x1, idmap_pg_dir mov sp, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + bl cpu_do_resume + +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_task_stack_below +#endif + + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a90c1f184792..16f97cdaaeae 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -53,6 +53,8 @@ #include <asm/tlbflush.h> #include <asm/ptrace.h> #include <asm/virt.h> +#include <asm/edac.h> +#include <soc/qcom/minidump.h> #define CREATE_TRACE_POINTS #include <trace/events/ipi.h> @@ -73,7 +75,8 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, - IPI_WAKEUP + IPI_WAKEUP, + IPI_CPU_BACKTRACE, }; /* @@ -150,6 +153,8 @@ asmlinkage notrace void secondary_start_kernel(void) cpu = task_cpu(current); set_my_cpu_offset(per_cpu_offset(cpu)); + pr_debug("CPU%u: Booted secondary processor\n", cpu); + /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -184,16 +189,16 @@ asmlinkage notrace void secondary_start_kernel(void) /* * Enable GIC and timers. */ - notify_cpu_starting(cpu); - smp_store_cpu_info(cpu); + notify_cpu_starting(cpu); + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue. */ - pr_info("CPU%u: Booted secondary processor [%08x]\n", + pr_debug("CPU%u: Booted secondary processor [%08x]\n", cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); @@ -278,7 +283,7 @@ void __cpu_die(unsigned int cpu) pr_crit("CPU%u: cpu didn't die\n", cpu); return; } - pr_notice("CPU%u: shutdown\n", cpu); + pr_debug("CPU%u: shutdown\n", cpu); /* * Now that the dying CPU is beyond the point of no return w.r.t. @@ -300,7 +305,7 @@ void __cpu_die(unsigned int cpu) * of the other hotplug-cpu capable cores, so presumably coming * out of idle fixes this. */ -void cpu_die(void) +void __ref cpu_die(void) { unsigned int cpu = smp_processor_id(); @@ -318,7 +323,16 @@ void cpu_die(void) */ cpu_ops[cpu]->cpu_die(cpu); - BUG(); + /* + * Do not return to the idle loop - jump back to the secondary + * cpu initialisation. There's some initialisation which needs + * to be repeated to undo the effects of taking the CPU offline. + */ + + asm volatile("mov sp, %0\n" + "mov x29, #0\n" + "b secondary_start_kernel" + : : "r" (task_stack_page(current) + THREAD_START_SP)); } #endif @@ -487,6 +501,18 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, #else #define acpi_table_parse_madt(...) do { } while (0) #endif +void (*__smp_cross_call)(const struct cpumask *, unsigned int); +DEFINE_PER_CPU(bool, pending_ipi); + +void smp_cross_call_common(const struct cpumask *cpumask, unsigned int func) +{ + unsigned int cpu; + + for_each_cpu(cpu, cpumask) + per_cpu(pending_ipi, cpu) = true; + + __smp_cross_call(cpumask, func); +} /* * Enumerate the possible CPU set from the device tree and build the @@ -635,8 +661,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } -void (*__smp_cross_call)(const struct cpumask *, unsigned int); - void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) { __smp_cross_call = fn; @@ -649,11 +673,17 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), - S(IPI_WAKEUP, "CPU wake-up interrupts"), + S(IPI_WAKEUP, "CPU wakeup interrupts"), + S(IPI_CPU_BACKTRACE, "CPU backtrace"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) { + unsigned int cpu; + + for_each_cpu(cpu, target) + per_cpu(pending_ipi, cpu) = true; + trace_ipi_raise(target, ipi_types[ipinr]); __smp_cross_call(target, ipinr); } @@ -685,12 +715,12 @@ u64 smp_irq_stat_cpu(unsigned int cpu) void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - smp_cross_call(mask, IPI_CALL_FUNC); + smp_cross_call_common(mask, IPI_CALL_FUNC); } void arch_send_call_function_single_ipi(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); + smp_cross_call_common(cpumask_of(cpu), IPI_CALL_FUNC); } #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL @@ -710,27 +740,100 @@ void arch_irq_work_raise(void) static DEFINE_RAW_SPINLOCK(stop_lock); +DEFINE_PER_CPU(struct pt_regs, regs_before_stop); + /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ -static void ipi_cpu_stop(unsigned int cpu) +static void ipi_cpu_stop(unsigned int cpu, struct pt_regs *regs) { if (system_state == SYSTEM_BOOTING || system_state == SYSTEM_RUNNING) { + per_cpu(regs_before_stop, cpu) = *regs; raw_spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); + show_regs(regs); dump_stack(); + dump_stack_minidump(regs->sp); + arm64_check_cache_ecc(NULL); raw_spin_unlock(&stop_lock); } set_cpu_online(cpu, false); + flush_cache_all(); local_irq_disable(); while (1) cpu_relax(); } +static cpumask_t backtrace_mask; +static DEFINE_RAW_SPINLOCK(backtrace_lock); + +/* "in progress" flag of arch_trigger_all_cpu_backtrace */ +static unsigned long backtrace_flag; + +static void smp_send_all_cpu_backtrace(void) +{ + unsigned int this_cpu = smp_processor_id(); + int i; + + if (test_and_set_bit(0, &backtrace_flag)) + /* + * If there is already a trigger_all_cpu_backtrace() in progress + * (backtrace_flag == 1), don't output double cpu dump infos. + */ + return; + + cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_clear_cpu(this_cpu, &backtrace_mask); + + pr_info("Backtrace for cpu %d (current):\n", this_cpu); + dump_stack(); + + pr_info("\nsending IPI to all other CPUs:\n"); + if (!cpumask_empty(&backtrace_mask)) + smp_cross_call_common(&backtrace_mask, IPI_CPU_BACKTRACE); + + /* Wait for up to 10 seconds for all other CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(&backtrace_mask)) + break; + mdelay(1); + } + + clear_bit(0, &backtrace_flag); + smp_mb__after_atomic(); +} + +/* + * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace() + */ +static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs) +{ + if (cpumask_test_cpu(cpu, &backtrace_mask)) { + raw_spin_lock(&backtrace_lock); + pr_warn("IPI backtrace for cpu %d\n", cpu); + show_regs(regs); + raw_spin_unlock(&backtrace_lock); + cpumask_clear_cpu(cpu, &backtrace_mask); + } +} + +#ifdef CONFIG_SMP +void arch_trigger_all_cpu_backtrace(void) +{ + smp_send_all_cpu_backtrace(); +} +#else +void arch_trigger_all_cpu_backtrace(void) +{ + dump_stack(); +} +#endif + + /* * Main handler for inter-processor interrupts */ @@ -757,7 +860,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_CPU_STOP: irq_enter(); - ipi_cpu_stop(cpu); + ipi_cpu_stop(cpu, regs); irq_exit(); break; @@ -777,6 +880,10 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif + case IPI_CPU_BACKTRACE: + ipi_cpu_backtrace(cpu, regs); + break; + #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL case IPI_WAKEUP: WARN_ONCE(!acpi_parking_protocol_valid(cpu), @@ -792,18 +899,21 @@ void handle_IPI(int ipinr, struct pt_regs *regs) if ((unsigned)ipinr < NR_IPI) trace_ipi_exit_rcuidle(ipi_types[ipinr]); + + per_cpu(pending_ipi, cpu) = false; set_irq_regs(old_regs); } void smp_send_reschedule(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); + BUG_ON(cpu_is_offline(cpu)); + smp_cross_call_common(cpumask_of(cpu), IPI_RESCHEDULE); } #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void tick_broadcast(const struct cpumask *mask) { - smp_cross_call(mask, IPI_TIMER); + smp_cross_call_common(mask, IPI_TIMER); } #endif @@ -828,7 +938,7 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - smp_cross_call(&mask, IPI_CPU_STOP); + smp_cross_call_common(&mask, IPI_CPU_STOP); } /* Wait up to one second for other CPUs to stop */ diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 2ccb883353d9..303d571702ea 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -30,7 +30,8 @@ #include <asm/smp_plat.h> extern void secondary_holding_pen(void); -volatile unsigned long secondary_holding_pen_release = INVALID_HWID; +volatile unsigned long __section(".mmuoff.data.read") +secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 75a856568813..0edcd34b45d2 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -15,6 +15,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/kasan.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/ftrace.h> @@ -63,10 +64,14 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (fp < low || fp > high || fp & 0xf) return -EINVAL; + kasan_disable_current(); + frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); + kasan_enable_current(); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk && tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { @@ -157,7 +162,8 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +static noinline void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) { struct stack_trace_data data; struct stackframe frame; @@ -167,17 +173,18 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) data.trace = trace; data.skip = trace->skip; + data.no_sched_functions = nosched; if (tsk != current) { - data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } else { - data.no_sched_functions = 0; + /* We don't want this function nor the caller */ + data.skip += 2; frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; - frame.pc = (unsigned long)save_stack_trace_tsk; + frame.pc = (unsigned long)__save_stack_trace; } #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = tsk->curr_ret_stack; @@ -189,10 +196,17 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) put_task_stack(tsk); } +EXPORT_SYMBOL(save_stack_trace_tsk); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} void save_stack_trace(struct stack_trace *trace) { - save_stack_trace_tsk(current, trace); + __save_stack_trace(current, trace, 0); } + EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index e7a96462ca2d..468b939f3471 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -13,30 +13,11 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: CPU context virtual address - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(ptr); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -54,6 +35,24 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -63,8 +62,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -80,20 +80,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with the idmap active in TTBR0_EL1. - * We must uninstall the idmap and restore the expected MMU - * state before we can possibly return to userspace. - */ - cpu_uninstall_idmap(); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* * PSTATE was not saved over suspend/resume, re-enable any @@ -108,8 +97,10 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * before debug exceptions are possibly reenabled * through local_dbg_restore. */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); @@ -124,22 +115,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - return 0; } early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 7758f7ff131b..d0e5fe5fbf22 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -19,14 +19,36 @@ #include <linux/nodemask.h> #include <linux/of.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/sched_energy.h> #include <asm/cputype.h> #include <asm/topology.h> +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu) { #ifdef CONFIG_CPU_FREQ @@ -181,6 +203,46 @@ static int __init parse_cluster(struct device_node *cluster, int depth) return 0; } +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + { NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +static DEFINE_PER_CPU(unsigned long, cpu_efficiency) = SCHED_CAPACITY_SCALE; + +unsigned long arch_get_cpu_efficiency(int cpu) +{ + return per_cpu(cpu_efficiency, cpu); +} +EXPORT_SYMBOL(arch_get_cpu_efficiency); + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ static int __init parse_dt_topology(void) { struct device_node *cn, *map; @@ -220,6 +282,107 @@ out: return ret; } +static void __init parse_dt_cpu_power(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn; + unsigned long min_capacity = ULONG_MAX; + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int cpu; + + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + u32 efficiency; + + /* Too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("Missing device node for CPU %d\n", cpu); + continue; + } + + /* + * The CPU efficiency value passed from the device tree + * overrides the value defined in the table_efficiency[] + */ + if (of_property_read_u32(cn, "efficiency", &efficiency) < 0) { + + for (cpu_eff = table_efficiency; + cpu_eff->compatible; cpu_eff++) + + if (of_device_is_compatible(cn, + cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) { + pr_warn("%s: Unknown CPU type\n", + cn->full_name); + continue; + } + + efficiency = cpu_eff->efficiency; + } + + per_cpu(cpu_efficiency, cpu) = efficiency; + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s: Missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equal we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_CAPACITY_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + return; + else if (4 * max_capacity < (3 * (max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_CAPACITY_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_CAPACITY_SHIFT-1)) + 1; +} + +/* + * Look for a customed capacity of a CPU in the cpu_topo_data table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_power(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + pr_info("CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + /* * cpu topology table */ @@ -281,14 +444,14 @@ static void update_cpu_capacity(unsigned int cpu) { unsigned long capacity = SCHED_CAPACITY_SCALE; - if (cpu_core_energy(cpu)) { + if (sched_energy_aware && cpu_core_energy(cpu)) { int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1; capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap; } set_capacity_scale(cpu, capacity); - pr_info("CPU%d: update cpu_capacity %lu\n", + pr_debug("CPU%d: update cpu_capacity %lu\n", cpu, arch_scale_cpu_capacity(NULL, cpu)); } @@ -353,6 +516,7 @@ void store_cpu_topology(unsigned int cpuid) topology_populated: update_siblings_masks(cpuid); + update_cpu_power(cpuid); update_cpu_capacity(cpuid); } @@ -374,18 +538,33 @@ static void __init reset_cpu_topology(void) } } +static void __init reset_cpu_power(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + set_power_scale(cpu, SCHED_CAPACITY_SCALE); +} + void __init init_cpu_topology(void) { + int cpu; + reset_cpu_topology(); /* * Discard anything that was parsed if we hit an error so we * don't use partial information. */ - if (of_have_populated_dt() && parse_dt_topology()) + if (of_have_populated_dt() && parse_dt_topology()) { reset_cpu_topology(); - else + } else { set_sched_topology(arm64_topology); + for_each_possible_cpu(cpu) + update_siblings_masks(cpu); + } + reset_cpu_power(); + parse_dt_cpu_power(); init_sched_energy_costs(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 47454ee143df..416aea2c6719 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -43,6 +43,10 @@ #include <asm/stacktrace.h> #include <asm/exception.h> #include <asm/system_misc.h> +#include <asm/esr.h> +#include <asm/edac.h> + +#include <trace/events/exception.h> static const char *handler[]= { "Synchronous Abort", @@ -151,7 +155,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) unsigned long irq_stack_ptr; int skip; - pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + pr_debug("%s(regs = %pK tsk = %pK)\n", __func__, regs, tsk); if (!tsk) tsk = current; @@ -267,9 +271,6 @@ static int __die(const char *str, int err, struct pt_regs *regs) end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { - dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk), - compat_user_mode(regs)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } @@ -277,40 +278,73 @@ static int __die(const char *str, int err, struct pt_regs *regs) return ret; } -static DEFINE_RAW_SPINLOCK(die_lock); +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; -/* - * This function is protected against re-entrancy. - */ -void die(const char *str, struct pt_regs *regs, int err) +static unsigned long oops_begin(void) { - int ret; + int cpu; unsigned long flags; - raw_spin_lock_irqsave(&die_lock, flags); - oops_enter(); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; console_verbose(); bust_spinlocks(1); - ret = __die(str, err, regs); + return flags; +} +static void oops_end(unsigned long flags, struct pt_regs *regs, int notify) +{ if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); + die_owner = -1; add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); + if (notify != NOTIFY_STOP) + do_exit(SIGSEGV); +} - raw_spin_unlock_irqrestore(&die_lock, flags); +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + unsigned long flags = oops_begin(); + int ret; - if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + if (!user_mode(regs)) + bug_type = report_bug(regs->pc, regs); + if (bug_type != BUG_TRAP_TYPE_NONE) + str = "Oops - BUG"; + + ret = __die(str, err, regs); + + oops_end(flags, regs, ret); } void arm64_notify_die(const char *str, struct pt_regs *regs, @@ -400,6 +434,8 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; + trace_undef_instr(regs, (void *)pc); + if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", current->comm, task_pid_nr(current), pc); @@ -518,6 +554,12 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n", handler[reason], esr, esr_get_class_string(esr)); + if (esr >> ESR_ELx_EC_SHIFT == ESR_ELx_EC_SERROR) { + pr_crit("System error detected. ESR.ISS = %08x\n", + esr & 0xffffff); + arm64_check_cache_ecc(NULL); + } + local_irq_disable(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index a6f1df69c0c3..91f541a1bdb1 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -1,5 +1,7 @@ /* - * VDSO implementation for AArch64 and vector page setup for AArch32. + * Additional userspace pages setup for AArch64 and AArch32. + * - AArch64: vDSO pages setup, vDSO data page update. + * - AArch32: sigreturn and kuser helpers pages setup. * * Copyright (C) 2012 ARM Limited * @@ -36,9 +38,11 @@ #include <asm/vdso.h> #include <asm/vdso_datapage.h> -extern char vdso_start[], vdso_end[]; -static unsigned long vdso_pages; -static struct page **vdso_pagelist; +struct vdso_mappings { + unsigned long num_code_pages; + struct vm_special_mapping data_mapping; + struct vm_special_mapping code_mapping; +}; /* * The vDSO data page. @@ -53,149 +57,258 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Create and map the vectors page for AArch32 tasks. */ -static struct page *vectors_page[1]; +#if !defined(CONFIG_VDSO32) || defined(CONFIG_KUSER_HELPERS) +static struct page *vectors_page[] __ro_after_init; +static const struct vm_special_mapping compat_vdso_spec[] = { + { + /* Must be named [sigpage] for compatibility with arm. */ + .name = "[sigpage]", + .pages = &vectors_page[0], + }, +#ifdef CONFIG_KUSER_HELPERS + { + .name = "[kuserhelpers]", + .pages = &vectors_page[1], + }, +#endif +}; +static struct page *vectors_page[ARRAY_SIZE(compat_vdso_spec)] __ro_after_init; +#endif static int __init alloc_vectors_page(void) { +#ifdef CONFIG_KUSER_HELPERS extern char __kuser_helper_start[], __kuser_helper_end[]; - extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; - - int kuser_sz = __kuser_helper_end - __kuser_helper_start; - int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long vpage; + size_t kuser_sz = __kuser_helper_end - __kuser_helper_start; + unsigned long kuser_vpage; +#endif - vpage = get_zeroed_page(GFP_ATOMIC); +#ifndef CONFIG_VDSO32 + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + size_t sigret_sz = + __aarch32_sigret_code_end - __aarch32_sigret_code_start; + unsigned long sigret_vpage; - if (!vpage) + sigret_vpage = get_zeroed_page(GFP_ATOMIC); + if (!sigret_vpage) return -ENOMEM; +#endif + +#ifdef CONFIG_KUSER_HELPERS + kuser_vpage = get_zeroed_page(GFP_ATOMIC); + if (!kuser_vpage) { +#ifndef CONFIG_VDSO32 + free_page(sigret_vpage); +#endif + return -ENOMEM; + } +#endif - /* kuser helpers */ - memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start, - kuser_sz); - +#ifndef CONFIG_VDSO32 /* sigreturn code */ - memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - __aarch32_sigret_code_start, sigret_sz); + memcpy((void *)sigret_vpage, __aarch32_sigret_code_start, sigret_sz); + flush_icache_range(sigret_vpage, sigret_vpage + PAGE_SIZE); + vectors_page[0] = virt_to_page(sigret_vpage); +#endif - flush_icache_range(vpage, vpage + PAGE_SIZE); - vectors_page[0] = virt_to_page(vpage); +#ifdef CONFIG_KUSER_HELPERS + /* kuser helpers */ + memcpy((void *)kuser_vpage + 0x1000 - kuser_sz, __kuser_helper_start, + kuser_sz); + flush_icache_range(kuser_vpage, kuser_vpage + PAGE_SIZE); + vectors_page[1] = virt_to_page(kuser_vpage); +#endif return 0; } arch_initcall(alloc_vectors_page); +#ifndef CONFIG_VDSO32 int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long addr = AARCH32_VECTORS_BASE; - static const struct vm_special_mapping spec = { - .name = "[vectors]", - .pages = vectors_page, - - }; + unsigned long addr; void *ret; down_write(&mm->mmap_sem); - current->mm->context.vdso = (void *)addr; + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = ERR_PTR(addr); + goto out; + } - /* Map vectors page at the high address. */ ret = _install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - &spec); + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &compat_vdso_spec[0]); + if (IS_ERR(ret)) + goto out; + + current->mm->context.vdso = (void *)addr; +#ifdef CONFIG_KUSER_HELPERS + /* Map the kuser helpers at the ABI-defined high address. */ + ret = _install_special_mapping(mm, AARCH32_KUSER_HELPERS_BASE, + PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + &compat_vdso_spec[1]); +#endif +out: up_write(&mm->mmap_sem); return PTR_ERR_OR_ZERO(ret); } +#endif /* !CONFIG_VDSO32 */ #endif /* CONFIG_COMPAT */ -static struct vm_special_mapping vdso_spec[2]; - -static int __init vdso_init(void) +static int __init vdso_mappings_init(const char *name, + const char *code_start, + const char *code_end, + struct vdso_mappings *mappings) { - int i; + unsigned long i, vdso_pages; + struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(vdso_start, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); + if (memcmp(code_start, "\177ELF", 4)) { + pr_err("%s is not a valid ELF object!\n", name); return -EINVAL; } - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); - - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), - GFP_KERNEL); + vdso_pages = (code_end - code_start) >> PAGE_SHIFT; + pr_info("%s: %ld pages (%ld code @ %p, %ld data @ %p)\n", + name, vdso_pages + 1, vdso_pages, code_start, 1L, + vdso_data); + + /* + * Allocate space for storing pointers to the vDSO code pages + the + * data page. The pointers must have the same lifetime as the mappings, + * which are static, so there is no need to keep track of the pointer + * array to free it. + */ + vdso_pagelist = kmalloc_array(vdso_pages + 1, sizeof(struct page *), + GFP_KERNEL); if (vdso_pagelist == NULL) return -ENOMEM; /* Grab the vDSO data page. */ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_start); + pfn = sym_to_pfn(code_start); for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); /* Populate the special mapping structures */ - vdso_spec[0] = (struct vm_special_mapping) { + mappings->data_mapping = (struct vm_special_mapping) { .name = "[vvar]", - .pages = vdso_pagelist, + .pages = &vdso_pagelist[0], }; - vdso_spec[1] = (struct vm_special_mapping) { + mappings->code_mapping = (struct vm_special_mapping) { .name = "[vdso]", .pages = &vdso_pagelist[1], }; + mappings->num_code_pages = vdso_pages; return 0; } + +#ifdef CONFIG_COMPAT +#ifdef CONFIG_VDSO32 + +static struct vdso_mappings vdso32_mappings __ro_after_init; + +static int __init vdso32_init(void) +{ + extern char vdso32_start[], vdso32_end[]; + + return vdso_mappings_init("vdso32", vdso32_start, vdso32_end, + &vdso32_mappings); +} +arch_initcall(vdso32_init); + +#endif /* CONFIG_VDSO32 */ +#endif /* CONFIG_COMPAT */ + +static struct vdso_mappings vdso_mappings __ro_after_init; + +static int __init vdso_init(void) +{ + extern char vdso_start[], vdso_end[]; + + return vdso_mappings_init("vdso", vdso_start, vdso_end, + &vdso_mappings); +} + arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +static int vdso_setup(struct mm_struct *mm, + const struct vdso_mappings *mappings) { - struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - vdso_text_len = vdso_pages << PAGE_SHIFT; + vdso_text_len = mappings->num_code_pages << PAGE_SHIFT; /* Be sure to map the data page */ vdso_mapping_len = vdso_text_len + PAGE_SIZE; - down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - ret = ERR_PTR(vdso_base); - goto up_fail; - } + if (IS_ERR_VALUE(vdso_base)) + return PTR_ERR_OR_ZERO(ERR_PTR(vdso_base)); ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_MAYREAD, - &vdso_spec[0]); + &mappings->data_mapping); if (IS_ERR(ret)) - goto up_fail; + return PTR_ERR_OR_ZERO(ret); vdso_base += PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_spec[1]); - if (IS_ERR(ret)) - goto up_fail; + &mappings->code_mapping); + if (!IS_ERR(ret)) + mm->context.vdso = (void *)vdso_base; + return PTR_ERR_OR_ZERO(ret); +} + +#ifdef CONFIG_COMPAT +#ifdef CONFIG_VDSO32 +int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + void *ret; + down_write(&mm->mmap_sem); + + ret = ERR_PTR(vdso_setup(mm, &vdso32_mappings)); +#ifdef CONFIG_KUSER_HELPERS + if (!IS_ERR(ret)) + /* Map the kuser helpers at the ABI-defined high address. */ + ret = _install_special_mapping(mm, AARCH32_KUSER_HELPERS_BASE, + PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYEXEC, + &compat_vdso_spec[1]); +#endif up_write(&mm->mmap_sem); - return 0; -up_fail: - mm->context.vdso = NULL; + return PTR_ERR_OR_ZERO(ret); +} +#endif /* CONFIG_VDSO32 */ +#endif /* CONFIG_COMPAT */ + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + down_write(&mm->mmap_sem); + ret = vdso_setup(mm, &vdso_mappings); up_write(&mm->mmap_sem); - return PTR_ERR(ret); + return ret; } /* @@ -216,16 +329,20 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { + struct timespec btm = ktime_to_timespec(tk->offs_boot); + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_sec; vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; + vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec; vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ vdso_data->cs_shift = tk->tkr_mono.shift; + vdso_data->btm_sec = btm.tv_sec; + vdso_data->btm_nsec = btm.tv_nsec; } smp_wmb(); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 62c84f7cb01b..4adcb532ac6a 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -5,18 +5,32 @@ # Heavily based on the vDSO Makefiles for other archs. # -obj-vdso := gettimeofday.o note.o sigreturn.o +obj-vdso-s := note.o sigreturn.o +obj-vdso-c := vgettimeofday.o # Build rules -targets := $(obj-vdso) vdso.so vdso.so.dbg -obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +targets := $(obj-vdso-s) $(obj-vdso-c) vdso.so vdso.so.dbg +obj-vdso-s := $(addprefix $(obj)/, $(obj-vdso-s)) +obj-vdso-c := $(addprefix $(obj)/, $(obj-vdso-c)) +obj-vdso := $(obj-vdso-c) $(obj-vdso-s) -ccflags-y := -shared -fno-common -fno-builtin +ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING -ffixed-x18 ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Force -O2 to avoid libgcc dependencies +CFLAGS_REMOVE_vgettimeofday.o = -pg -Os +CFLAGS_vgettimeofday.o = -O2 -fPIC +ifneq ($(cc-name),clang) +CFLAGS_vgettimeofday.o += -mcmodel=tiny +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n # Workaround for bare-metal (ELF) toolchains that neglect to pass -shared # down to collect2, resulting in silent corruption of the vDSO image. @@ -49,12 +63,17 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S FORCE +$(obj-vdso-s): %.o: %.S FORCE $(call if_changed_dep,vdsoas) +$(obj-vdso-c): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) + # Actual build commands quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = ${CC} $(c_flags) -c -o $@ $< quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/arm64/kernel/vdso/compiler.h b/arch/arm64/kernel/vdso/compiler.h new file mode 100644 index 000000000000..fb27545640f2 --- /dev/null +++ b/arch/arm64/kernel/vdso/compiler.h @@ -0,0 +1,70 @@ +/* + * Userspace implementations of fallback calls + * + * Copyright (C) 2017 Cavium, Inc. + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Rewriten into C by: Andrew Pinski <apinski@cavium.com> + */ + +#ifndef __VDSO_COMPILER_H +#define __VDSO_COMPILER_H + +#include <asm/processor.h> /* for cpu_relax() */ +#include <asm/sysreg.h> /* for read_sysreg() */ +#include <asm/unistd.h> +#include <linux/compiler.h> +#include <linux/hrtimer.h> /* for LOW_RES_NSEC and MONOTONIC_RES_NSEC */ + +#ifdef CONFIG_ARM_ARCH_TIMER +#define ARCH_PROVIDES_TIMER +#endif + +#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \ +static notrace long name##_fallback(type_arg1 _##name_arg1, \ + type_arg2 _##name_arg2) \ +{ \ + register type_arg1 name_arg1 asm("x0") = _##name_arg1; \ + register type_arg2 name_arg2 asm("x1") = _##name_arg2; \ + register long ret asm ("x0"); \ + register long nr asm("x8") = __NR_##name; \ + \ + asm volatile( \ + " svc #0\n" \ + : "=r" (ret) \ + : "r" (name_arg1), "r" (name_arg2), "r" (nr) \ + : "memory"); \ + \ + return ret; \ +} + +/* + * AArch64 implementation of arch_counter_get_cntvct() suitable for vdso + */ +static __always_inline notrace u64 arch_vdso_read_counter(void) +{ + /* Read the virtual counter. */ + isb(); + return read_sysreg(cntvct_el0); +} + +/* Rename exported vdso functions */ +#define __vdso_clock_gettime __kernel_clock_gettime +#define __vdso_gettimeofday __kernel_gettimeofday +#define __vdso_clock_getres __kernel_clock_getres +#define __vdso_time __kernel_time + +#endif /* __VDSO_COMPILER_H */ diff --git a/arch/arm64/kernel/vdso/datapage.h b/arch/arm64/kernel/vdso/datapage.h new file mode 100644 index 000000000000..be86a6074cf8 --- /dev/null +++ b/arch/arm64/kernel/vdso/datapage.h @@ -0,0 +1,59 @@ +/* + * Userspace implementations of __get_datapage + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __VDSO_DATAPAGE_H +#define __VDSO_DATAPAGE_H + +#include <linux/bitops.h> +#include <linux/types.h> +#include <asm/vdso_datapage.h> + +/* + * We use the hidden visibility to prevent the compiler from generating a GOT + * relocation. Not only is going through a GOT useless (the entry couldn't and + * mustn't be overridden by another library), it does not even work: the linker + * cannot generate an absolute address to the data page. + * + * With the hidden visibility, the compiler simply generates a PC-relative + * relocation (R_ARM_REL32), and this is what we need. + */ +extern const struct vdso_data _vdso_data __attribute__((visibility("hidden"))); + +static inline const struct vdso_data *__get_datapage(void) +{ + const struct vdso_data *ret; + /* + * This simply puts &_vdso_data into ret. The reason why we don't use + * `ret = &_vdso_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_data in a register, + * it goes through a relocation almost every time _vdso_data must be + * accessed (even in subfunctions). This is both time and space + * consuming: each relocation uses a word in the code section, and it + * has to be loaded at runtime. + * + * This trick hides the assignment from the compiler. Since it cannot + * track where the pointer comes from, it will only use one relocation + * where __get_datapage() is called, and then keep the result in a + * register. + */ + asm("" : "=r"(ret) : "0"(&_vdso_data)); + return ret; +} + +/* We can only guarantee 56 bits of precision. */ +#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0) + +#endif /* __VDSO_DATAPAGE_H */ diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index b3e6c4d5b75c..3dc1198b5ec9 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -94,6 +94,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_time; local: *; }; } diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..b73d4011993d --- /dev/null +++ b/arch/arm64/kernel/vdso/vgettimeofday.c @@ -0,0 +1,3 @@ +#include "compiler.h" +#include "datapage.h" +#include "../../../../lib/vdso/vgettimeofday.c" diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..4fea950fa5ed --- /dev/null +++ b/arch/arm64/kernel/vdso32/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso.so.raw diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile new file mode 100644 index 000000000000..807d08e28c27 --- /dev/null +++ b/arch/arm64/kernel/vdso32/Makefile @@ -0,0 +1,178 @@ +# +# Building a vDSO image for AArch32. +# +# Author: Kevin Brodsky <kevin.brodsky@arm.com> +# A mix between the arm64 and arm vDSO Makefiles. + +ifeq ($(cc-name),clang) + CC_ARM32 := $(CC) $(CLANG_TARGET_ARM32) -no-integrated-as + GCC_ARM32_TC := $(realpath $(dir $(shell which $(CROSS_COMPILE_ARM32)ld))/..) +ifneq ($(GCC_ARM32_TC),) + CC_ARM32 += --gcc-toolchain=$(GCC_ARM32_TC) +endif +else + CC_ARM32 := $(CROSS_COMPILE_ARM32)$(cc-name) +endif + +# Same as cc-*option, but using CC_ARM32 instead of CC +cc32-option = $(call try-run,\ + $(CC_ARM32) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-disable-warning = $(call try-run,\ + $(CC_ARM32) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) +cc32-ldoption = $(call try-run,\ + $(CC_ARM32) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + +# We cannot use the global flags to compile the vDSO files, the main reason +# being that the 32-bit compiler may be older than the main (64-bit) compiler +# and therefore may not understand flags set using $(cc-option ...). Besides, +# arch-specific options should be taken from the arm Makefile instead of the +# arm64 one. +# As a result we set our own flags here. + +# From top-level Makefile +# NOSTDINC_FLAGS +VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(CC_ARM32) -print-file-name=include) +VDSO_CPPFLAGS += $(LINUXINCLUDE) +VDSO_CPPFLAGS += -D__KERNEL__ +VDSO_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +VDSO_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS) + +# Common C and assembly flags +# From top-level Makefile +VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) +ifdef CONFIG_DEBUG_INFO +VDSO_CAFLAGS += -g +endif +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC_ARM32)), y) +VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO +endif + +# From arm Makefile +VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) +VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +VDSO_CAFLAGS += -mbig-endian +else +VDSO_CAFLAGS += -mlittle-endian +endif + +# From arm vDSO Makefile +VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector +VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + +# Try to compile for ARMv8. If the compiler is too old and doesn't support it, +# fall back to v7. There is no easy way to check for what architecture the code +# is being compiled, so define a macro specifying that (see arch/arm/Makefile). +VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ + -march=armv7-a -D__LINUX_ARM_ARCH__=7) + +VDSO_CFLAGS := $(VDSO_CAFLAGS) +# KBUILD_CFLAGS from top-level Makefile +VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -std=gnu89 +VDSO_CFLAGS += -O2 +# Some useful compiler-dependent flags from top-level Makefile +VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) +VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) +VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow) +VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) +VDSO_CFLAGS += $(call cc32-option,-Werror=date-time) +VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) + +# The 32-bit compiler does not provide 128-bit integers, which are used in +# some headers that are indirectly included from the vDSO code. +# This hack makes the compiler happy and should trigger a warning/error if +# variables of such type are referenced. +VDSO_CFLAGS += -D__uint128_t='void*' +# Silence some warnings coming from headers that operate on long's +# (on GCC 4.8 or older, there is unfortunately no way to silence this warning) +VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) +VDSO_CFLAGS += -Wno-int-to-pointer-cast + +VDSO_AFLAGS := $(VDSO_CAFLAGS) +VDSO_AFLAGS += -D__ASSEMBLY__ + +VDSO_LDFLAGS := $(VDSO_CPPFLAGS) +# From arm vDSO Makefile +VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) + + +# Borrow vdsomunge.c from the arm vDSO +# We have to use a relative path because scripts/Makefile.host prefixes +# $(hostprogs-y) with $(obj) +munge := ../../../arm/vdso/vdsomunge +hostprogs-y := $(munge) + +c-obj-vdso := vgettimeofday.o +asm-obj-vdso := sigreturn.o + +# Build rules +targets := $(c-obj-vdso) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw +c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso)) +asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) +obj-vdso := $(c-obj-vdso) $(asm-obj-vdso) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (vdso.s includes vdso.so through incbin) +$(obj)/vdso.o: $(obj)/vdso.so + +include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Strip rule for vdso.so +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE + $(call if_changed,vdsomunge) + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + +# Compilation rules for the vDSO sources +$(filter-out vgettimeofday.o, $(c-obj-vdso)): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) +$(asm-obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL32 $@ + cmd_vdsold = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ +quiet_cmd_vdsocc = VDSOC32 $@ + cmd_vdsocc = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< +quiet_cmd_vdsoas = VDSOA32 $@ + cmd_vdsoas = $(CC_ARM32) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(obj)/$(munge) $< $@ + +# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) +gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +# The AArch64 nm should be able to read an AArch32 binary + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso32/compiler.h b/arch/arm64/kernel/vdso32/compiler.h new file mode 100644 index 000000000000..19a43fc37bb9 --- /dev/null +++ b/arch/arm64/kernel/vdso32/compiler.h @@ -0,0 +1,122 @@ +/* + * Userspace implementations of fallback calls + * + * Copyright (C) 2017 Cavium, Inc. + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Rewriten into C by: Andrew Pinski <apinski@cavium.com> + */ + +#ifndef __VDSO_COMPILER_H +#define __VDSO_COMPILER_H + +#include <generated/autoconf.h> +#undef CONFIG_64BIT +#include <asm/barrier.h> /* for isb() & dmb() */ +#include <asm/param.h> /* for HZ */ +#include <asm/unistd32.h> +#include <linux/compiler.h> + +#ifdef CONFIG_ARM_ARCH_TIMER +#define ARCH_PROVIDES_TIMER +#endif + +/* can not include linux/time.h because of too much architectural cruft */ +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000L +#endif + +/* can not include linux/jiffies.h because of too much architectural cruft */ +#ifndef TICK_NSEC +#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) +#endif + +/* can not include linux/hrtimer.h because of too much architectural cruft */ +#ifndef LOW_RES_NSEC +#define LOW_RES_NSEC TICK_NSEC +#ifdef ARCH_PROVIDES_TIMER +#ifdef CONFIG_HIGH_RES_TIMERS +# define HIGH_RES_NSEC 1 +# define MONOTONIC_RES_NSEC HIGH_RES_NSEC +#else +# define MONOTONIC_RES_NSEC LOW_RES_NSEC +#endif +#endif +#endif + +#define DEFINE_FALLBACK(name, type_arg1, name_arg1, type_arg2, name_arg2) \ +static notrace long name##_fallback(type_arg1 _##name_arg1, \ + type_arg2 _##name_arg2) \ +{ \ + register type_arg1 name_arg1 asm("r0") = _##name_arg1; \ + register type_arg2 name_arg2 asm("r1") = _##name_arg2; \ + register long ret asm ("r0"); \ + register long nr asm("r7") = __NR_##name; \ + \ + asm volatile( \ + " swi #0\n" \ + : "=r" (ret) \ + : "r" (name_arg1), "r" (name_arg2), "r" (nr) \ + : "memory"); \ + \ + return ret; \ +} + +/* + * AArch32 implementation of arch_counter_get_cntvct() suitable for vdso + */ +static __always_inline notrace u64 arch_vdso_read_counter(void) +{ + u64 res; + + /* Read the virtual counter. */ + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res)); + + return res; +} + +/* + * Can not include asm/processor.h to pick this up because of all the + * architectural components also included, so we open code a copy. + */ +static inline void cpu_relax(void) +{ + asm volatile("yield" ::: "memory"); +} + +#undef smp_rmb +#if __LINUX_ARM_ARCH__ >= 8 +#define smp_rmb() dmb(ishld) /* ok on ARMv8 */ +#else +#define smp_rmb() dmb(ish) /* ishld does not exist on ARMv7 */ +#endif + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} + +#endif /* __VDSO_COMPILER_H */ diff --git a/arch/arm64/kernel/vdso32/datapage.h b/arch/arm64/kernel/vdso32/datapage.h new file mode 100644 index 000000000000..fe3e216d94d1 --- /dev/null +++ b/arch/arm64/kernel/vdso32/datapage.h @@ -0,0 +1 @@ +#include "../vdso/datapage.h" diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S new file mode 100644 index 000000000000..14e5f9ca34f9 --- /dev/null +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -0,0 +1,76 @@ +/* + * Sigreturn trampolines for returning from a signal when the SA_RESTORER + * flag is not set. + * + * Copyright (C) 2016 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Based on glibc's arm sa_restorer. While this is not strictly necessary, we + * provide both A32 and T32 versions, in accordance with the arm sigreturn + * code. + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd32.h> + +.macro sigreturn_trampoline name, syscall, regs_offset + /* + * We provide directives for enabling stack unwinding through the + * trampoline. On arm, CFI directives are only used for debugging (and + * the vDSO is stripped of debug information), so only the arm-specific + * unwinding directives are useful here. + */ + .fnstart + .save {r0-r15} + .pad #\regs_offset + /* + * It is necessary to start the unwind tables at least one instruction + * before the trampoline, as the unwinder will assume that the signal + * handler has been called from the trampoline, that is just before + * where the signal handler returns (mov r7, ...). + */ + nop +ENTRY(\name) + mov r7, #\syscall + svc #0 + .fnend + /* + * We would like to use ENDPROC, but the macro uses @ which is a + * comment symbol for arm assemblers, so directly use .type with % + * instead. + */ + .type \name, %function +END(\name) +.endm + + .text + + .arm + sigreturn_trampoline __kernel_sigreturn_arm, \ + __NR_sigreturn, \ + COMPAT_SIGFRAME_REGS_OFFSET + + sigreturn_trampoline __kernel_rt_sigreturn_arm, \ + __NR_rt_sigreturn, \ + COMPAT_RT_SIGFRAME_REGS_OFFSET + + .thumb + sigreturn_trampoline __kernel_sigreturn_thumb, \ + __NR_sigreturn, \ + COMPAT_SIGFRAME_REGS_OFFSET + + sigreturn_trampoline __kernel_rt_sigreturn_thumb, \ + __NR_rt_sigreturn, \ + COMPAT_RT_SIGFRAME_REGS_OFFSET diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S new file mode 100644 index 000000000000..fe19ff70eb76 --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.S @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + .globl vdso32_start, vdso32_end + .section .rodata + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/arm64/kernel/vdso32/vdso.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S new file mode 100644 index 000000000000..f95cb1c431fb --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -0,0 +1,95 @@ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_clock_getres; + __vdso_time; + __kernel_sigreturn_arm; + __kernel_sigreturn_thumb; + __kernel_rt_sigreturn_arm; + __kernel_rt_sigreturn_thumb; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; +VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; +VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; +VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..b73d4011993d --- /dev/null +++ b/arch/arm64/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,3 @@ +#include "compiler.h" +#include "datapage.h" +#include "../../../../lib/vdso/vgettimeofday.c" diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6e4832def254..71c8076bbc60 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -11,6 +11,7 @@ #include <asm/memory.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/cache.h> #include "image.h" @@ -46,6 +47,16 @@ jiffies = jiffies_64; *(.idmap.text) \ VMLINUX_SYMBOL(__idmap_text_end) = .; +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ @@ -74,14 +85,19 @@ PECOFF_FILE_ALIGNMENT = 0x200; #endif #if defined(CONFIG_DEBUG_ALIGN_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO -#elif defined(CONFIG_DEBUG_RODATA) -#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT); -#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +/* + * 4 KB granule: 1 level 2 entry + * 16 KB granule: 128 level 3 entries, with contiguous bit + * 64 KB granule: 32 level 3 entries, with contiguous bit + */ +#define SEGMENT_ALIGN SZ_2M #else -#define ALIGN_DEBUG_RO -#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +/* + * 4 KB granule: 16 level 3 entries, with contiguous bit + * 16 KB granule: 4 level 3 entries, without contiguous bit + * 64 KB granule: 1 level 3 entry + */ +#define SEGMENT_ALIGN SZ_64K #endif SECTIONS @@ -99,6 +115,7 @@ SECTIONS *(.discard) *(.discard.*) *(.interp .dynamic) + *(.dynsym .dynstr .hash) } . = KIMAGE_VADDR + TEXT_OFFSET; @@ -107,19 +124,21 @@ SECTIONS _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; *(.exception.text) __exception_text_end = .; IRQENTRY_TEXT + ENTRY_TEXT SOFTIRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT + HIBERNATE_TEXT TRAMP_TEXT *(.fixup) *(.gnu.warning) @@ -127,14 +146,14 @@ SECTIONS *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); _etext = .; /* End of text section */ RO_DATA(PAGE_SIZE) /* everything from this point to */ EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES - ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; INIT_TEXT_SECTION(8) @@ -166,27 +185,37 @@ SECTIONS *(.altinstr_replacement) } .rela : ALIGN(8) { - __reloc_start = .; *(.rela .rela*) - __reloc_end = .; - } - .dynsym : ALIGN(8) { - __dynsym_start = .; - *(.dynsym) - } - .dynstr : { - *(.dynstr) - } - .hash : { - *(.hash) } - . = ALIGN(PAGE_SIZE); + __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR); + __rela_size = SIZEOF(.rela); + + . = ALIGN(SEGMENT_ALIGN); __init_end = .; _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + + /* + * Data written with the MMU off but read with the MMU on requires + * cache lines to be invalidated, discarding up to a Cache Writeback + * Granule (CWG) of data from the cache. Keep the section that + * requires this type of maintenance to be in its own Cache Writeback + * Granule (CWG) area so the cache maintenance operations don't + * interfere with adjacent data. + */ + .mmuoff.data.write : ALIGN(SZ_2K) { + __mmuoff_data_start = .; + *(.mmuoff.data.write) + } + . = ALIGN(SZ_2K); + .mmuoff.data.read : { + *(.mmuoff.data.read) + __mmuoff_data_end = .; + } + PECOFF_EDATA_PADDING _edata = .; @@ -223,6 +252,10 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, |
