diff options
55 files changed, 1225 insertions, 605 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 58b71ddf9b60..ba4b6acfc545 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -56,3 +56,4 @@ stable kernels. | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index 1ebef5c69fb6..8531a7a79e33 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -24,6 +24,7 @@ CONFIG_DM_VERITY=y CONFIG_DM_VERITY_FEC=y CONFIG_EMBEDDED=y CONFIG_FB=y +CONFIG_HARDENED_USERCOPY=y CONFIG_HIGH_RES_TIMERS=y CONFIG_INET6_AH=y CONFIG_INET6_ESP=y diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a5019f2afa9c..350c05eacb0d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -715,6 +715,14 @@ config SETEND_EMULATION If unsure, say Y endif +config ARM64_SW_TTBR0_PAN + bool "Emulate Priviledged Access Never using TTBR0_EL1 switching" + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved + zeroed area and reserved ASID. The user access routines + restore the valid TTBR0_EL1 temporarily. + menu "ARMv8.1 architectural features" config ARM64_HW_AFDBM diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index beccbdefa106..8746ff6abd77 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap, enable = 1 - .if \enable +.macro alternative_if_not cap .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: - .endif .endm /* @@ -118,27 +116,27 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else, enable = 1 - .if \enable +.macro alternative_else 662: .pushsection .altinstr_replacement, "ax" 663: - .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif, enable = 1 - .if \enable +.macro alternative_endif 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) - .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _ASM_EXTABLE 9999b, \label +.endm /* * Generate the assembly for UAO alternatives with exception table entries. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 70f7b9e04598..aeb4554b3af3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -1,5 +1,5 @@ /* - * Based on arch/arm/include/asm/assembler.h + * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S * * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. @@ -23,6 +23,10 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm/asm-offsets.h> +#include <asm/cpufeature.h> +#include <asm/page.h> +#include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -49,6 +53,15 @@ msr daifclr, #2 .endm + .macro save_and_disable_irq, flags + mrs \flags, daif + msr daifset, #2 + .endm + + .macro restore_irq, flags + msr daif, \flags + .endm + /* * Enable and disable debug exceptions. */ @@ -212,6 +225,111 @@ lr .req x30 // link register .endm /* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + ldr_l \tmpreg, idmap_t0sz + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: + .if (\op == cvau || \op == cvac) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .else + dc \op, \kaddr + .endif + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm + +/* + * copy_page - copy src to dest using temp registers t1-t8 + */ + .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req +9998: ldp \t1, \t2, [\src] + ldp \t3, \t4, [\src, #16] + ldp \t5, \t6, [\src, #32] + ldp \t7, \t8, [\src, #48] + add \src, \src, #64 + stnp \t1, \t2, [\dest] + stnp \t3, \t4, [\dest, #16] + stnp \t5, \t6, [\dest, #32] + stnp \t7, \t8, [\dest, #48] + add \dest, \dest, #64 + tst \src, #(PAGE_SIZE - 1) + b.ne 9998b + .endm + +/* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. */ @@ -233,4 +351,48 @@ lr .req x30 // link register .long \sym\()_hi32 .endm + /* + * mov_q - move an immediate constant into a 64-bit register using + * between 2 and 4 movz/movk instructions (depending on the + * magnitude and sign of the operand) + */ + .macro mov_q, reg, val + .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff) + movz \reg, :abs_g1_s:\val + .else + .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff) + movz \reg, :abs_g2_s:\val + .else + movz \reg, :abs_g3:\val + movk \reg, :abs_g2_nc:\val + .endif + movk \reg, :abs_g1_nc:\val + .endif + movk \reg, :abs_g0_nc:\val + .endm + +/* + * Return the current thread_info. + */ + .macro get_thread_info, rd + mrs \rd, sp_el0 + .endm + +/* + * Errata workaround post TTBR0_EL1 update. + */ + .macro post_ttbr0_update_workaround +#ifdef CONFIG_CAVIUM_ERRATUM_27456 +alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 + nop + nop + nop +alternative_else + ic iallu + dsb nsh + isb +alternative_endif +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 3e9e7c67bfbc..4b763d7eed36 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -33,9 +33,10 @@ #define ARM64_HAS_NO_HW_PREFETCH 8 #define ARM64_HAS_UAO 9 #define ARM64_ALT_PAN_NOT_UAO 10 -#define ARM64_WORKAROUND_CAVIUM_27456 11 +#define ARM64_HAS_VIRT_HOST_EXTN 11 +#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_NCAPS 12 +#define ARM64_NCAPS 13 #ifndef __ASSEMBLY__ @@ -188,6 +189,12 @@ static inline bool system_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); } +static inline bool system_uses_ttbr0_pan(void) +{ + return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && + !cpus_have_cap(ARM64_HAS_PAN); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index ef572206f1c3..932f5a56d1a6 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -1,8 +1,11 @@ #ifndef _ASM_EFI_H #define _ASM_EFI_H +#include <asm/cpufeature.h> #include <asm/io.h> +#include <asm/mmu_context.h> #include <asm/neon.h> +#include <asm/tlbflush.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -10,6 +13,8 @@ extern void efi_init(void); #define efi_init() #endif +int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); + #define efi_call_virt(f, ...) \ ({ \ efi_##f##_t *__f; \ @@ -63,6 +68,34 @@ extern void efi_init(void); * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ +static inline void efi_set_pgd(struct mm_struct *mm) +{ + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + if (mm != current->active_mm) { + /* + * Update the current thread's saved ttbr0 since it is + * restored as part of a return from exception. Set + * the hardware TTBR0_EL1 using cpu_switch_mm() + * directly to enable potential errata workarounds. + */ + update_saved_ttbr0(current, mm); + cpu_switch_mm(mm->pgd, mm); + } else { + /* + * Defer the switch to the current thread's TTBR0_EL1 + * until uaccess_enable(). Restore the current + * thread's saved ttbr0 corresponding to its active_mm + * (if different from init_mm). + */ + cpu_set_reserved_ttbr0(); + if (current->active_mm != &init_mm) + update_saved_ttbr0(current, current->active_mm); + } + } +} + void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 77eeb2cc648f..f772e15c4766 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -74,6 +74,7 @@ #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index f2585cdd32c2..71dfa3b42313 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -27,9 +27,9 @@ #include <asm/sysreg.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ +do { \ + uaccess_enable(); \ asm volatile( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ @@ -44,11 +44,11 @@ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -118,8 +118,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" -ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -134,10 +134,10 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " .popsection\n" _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) -ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); + uaccess_disable(); *uval = val; return ret; diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 5c6375d8528b..7803343e5881 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -19,6 +19,8 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include <asm/pgtable.h> +#include <asm/sparsemem.h> /* * The linear mapping and the start of memory are both 2M aligned (per @@ -53,6 +55,12 @@ #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +#define RESERVED_TTBR0_SIZE (PAGE_SIZE) +#else +#define RESERVED_TTBR0_SIZE (0) +#endif + /* Initial memory map size */ #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT @@ -86,10 +94,24 @@ * (64k granule), or a multiple that can be mapped using contiguous bits * in the page tables: 32 * PMD_SIZE (16k granule) */ -#ifdef CONFIG_ARM64_64K_PAGES -#define ARM64_MEMSTART_ALIGN SZ_512M +#if defined(CONFIG_ARM64_4K_PAGES) +#define ARM64_MEMSTART_SHIFT PUD_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5) +#else +#define ARM64_MEMSTART_SHIFT PMD_SHIFT +#endif + +/* + * sparsemem vmemmap imposes an additional requirement on the alignment of + * memstart_addr, due to the fact that the base of the vmemmap region + * has a direct correspondence, and needs to appear sufficiently aligned + * in the virtual address space. + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS +#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) #else -#define ARM64_MEMSTART_ALIGN SZ_1G +#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) #endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 12f8a00fb3f1..ba1b3409d7ed 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -193,7 +193,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define _virt_addr_is_linear(kaddr) (((u64)(kaddr)) >= PAGE_OFFSET) +#define virt_addr_valid(kaddr) (_virt_addr_is_linear(kaddr) && \ + _virt_addr_valid(kaddr)) #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a00f7cf35bbd..4a32fd5f101d 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -23,6 +23,7 @@ #include <linux/sched.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/proc-fns.h> #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> @@ -113,7 +114,7 @@ static inline void cpu_uninstall_idmap(void) local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); - if (mm != &init_mm) + if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); } @@ -173,20 +174,26 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -/* - * This is the actual mm switch as far as the scheduler - * is concerned. No registers are touched. We avoid - * calling the CPU specific function when the mm hasn't - * actually changed. - */ -static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) { - unsigned int cpu = smp_processor_id(); + if (system_uses_ttbr0_pan()) { + BUG_ON(mm->pgd == swapper_pg_dir); + task_thread_info(tsk)->ttbr0 = + virt_to_phys(mm->pgd) | ASID(mm) << 48; + } +} +#else +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) +{ +} +#endif - if (prev == next) - return; +static inline void __switch_mm(struct mm_struct *next) +{ + unsigned int cpu = smp_processor_id(); /* * init_mm.pgd does not contain any user mappings and it is always @@ -200,7 +207,23 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, check_and_switch_context(next, cpu); } +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (prev != next) + __switch_mm(next); + + /* + * Update the saved TTBR0_EL1 of the scheduled-in task as the previous + * value may have not been initialised yet (activate_mm caller) or the + * ASID has changed since the last run (following the context switch + * of another thread of the same process). + */ + update_saved_ttbr0(tsk, next); +} + #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) +#define activate_mm(prev,next) switch_mm(prev, next, current) #endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 7f94755089e2..b7b4ff488951 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -21,6 +21,8 @@ #include <uapi/asm/ptrace.h> +#define _PSR_PAN_BIT 22 + /* Current Exception Level values, as contained in CurrentEL */ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b9fd8ec79033..1a78d6e2a78b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,8 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <linux/stringify.h> + #include <asm/opcodes.h> /* @@ -215,6 +217,8 @@ #else +#include <linux/types.h> + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ .L__reg_num_x\\num, \\num\n" @@ -239,6 +243,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set) val |= set; asm volatile("msr sctlr_el1, %0" : : "r" (val)); } + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index abd64bd1f6d9..b3325a9cb90f 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -47,6 +47,9 @@ typedef unsigned long mm_segment_t; struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ +#endif struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index c3d445b42351..c37c064d7cdd 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,8 @@ #ifndef __ASM_UACCESS_H #define __ASM_UACCESS_H +#ifndef __ASSEMBLY__ + /* * User space memory access functions */ @@ -26,6 +28,7 @@ #include <asm/alternative.h> #include <asm/cpufeature.h> +#include <asm/kernel-pgtable.h> #include <asm/ptrace.h> #include <asm/sysreg.h> #include <asm/errno.h> @@ -124,6 +127,85 @@ static inline void set_fs(mm_segment_t fs) " .popsection\n" /* + * User access enabling/disabling. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void uaccess_ttbr0_disable(void) +{ + unsigned long ttbr; + + /* reserved_ttbr0 placed at the end of swapper_pg_dir */ + ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; + write_sysreg(ttbr, ttbr0_el1); + isb(); +} + +static inline void uaccess_ttbr0_enable(void) +{ + unsigned long flags; + + /* + * Disable interrupts to avoid preemption between reading the 'ttbr0' + * variable and the MSR. A context switch could trigger an ASID + * roll-over and an update of 'ttbr0'. + */ + local_irq_save(flags); + write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + isb(); + local_irq_restore(flags); +} +#else +static inline void uaccess_ttbr0_disable(void) +{ +} + +static inline void uaccess_ttbr0_enable(void) +{ +} +#endif + +#define __uaccess_disable(alt) \ +do { \ + if (system_uses_ttbr0_pan()) \ + uaccess_ttbr0_disable(); \ + else \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +#define __uaccess_enable(alt) \ +do { \ + if (system_uses_ttbr0_pan()) \ + uaccess_ttbr0_enable(); \ + else \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +static inline void uaccess_disable(void) +{ + __uaccess_disable(ARM64_HAS_PAN); +} + +static inline void uaccess_enable(void) +{ + __uaccess_enable(ARM64_HAS_PAN); +} + +/* + * These functions are no-ops when UAO is present. + */ +static inline void uaccess_disable_not_uao(void) +{ + __uaccess_disable(ARM64_ALT_PAN_NOT_UAO); +} + +static inline void uaccess_enable_not_uao(void) +{ + __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); +} + +/* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" * call. @@ -150,8 +232,7 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -172,9 +253,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -219,8 +299,7 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -241,8 +320,7 @@ do { \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_disable_not_uao(); \ } while (0) #define __put_user(x, ptr) \ @@ -327,4 +405,73 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +#else /* __ASSEMBLY__ */ + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/kernel-pgtable.h> + +/* + * User access enabling/disabling macros. + */ + .macro uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + +/* + * These macros are no-ops when UAO is present. + */ + .macro uaccess_disable_not_uao, tmp1 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +alternative_if_not ARM64_HAS_PAN + uaccess_ttbr0_disable \tmp1 +alternative_else + nop + nop + nop + nop +alternative_endif +#endif +alternative_if_not ARM64_ALT_PAN_NOT_UAO + nop +alternative_else + SET_PSTATE_PAN(1) +alternative_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else + nop + nop + nop + nop + nop + nop + nop +alternative_endif +#endif +alternative_if_not ARM64_ALT_PAN_NOT_UAO + nop +alternative_else + SET_PSTATE_PAN(0) +alternative_endif + .endm + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c0c838..562f7ddb158b 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -281,9 +281,9 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) * Error-checking SWP macros implemented using ldxr{b}/stxr{b} */ #define __user_swpX_asm(data, addr, res, temp, B) \ +do { \ + uaccess_enable(); \ __asm__ __volatile__( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ "0: ldxr"B" %w2, [%3]\n" \ "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ @@ -299,11 +299,11 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) " .popsection" \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) #define __user_swp_asm(data, addr, res, temp) \ __user_swpX_asm(data, addr, res, temp, "") diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 087cf9a65359..d0ec987dba5b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -36,6 +36,9 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TI_TTBR0, offsetof(struct thread_info, ttbr0)); +#endif DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); BLANK(); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7566cad9fa1d..40ee3f2933e7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -43,6 +43,7 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcaps); #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4eeb17198cfa..b6abc852f2a1 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,317 +11,34 @@ * */ -#include <linux/atomic.h> #include <linux/dmi.h> #include <linux/efi.h> -#include <linux/export.h> -#include <linux/memblock.h> -#include <linux/mm_types.h> -#include <linux/bootmem.h> -#include <linux/of.h> -#include <linux/of_fdt.h> -#include <linux/preempt.h> -#include <linux/rbtree.h> -#include <linux/rwsem.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> +#include <linux/init.h> -#include <asm/cacheflush.h> #include <asm/efi.h> -#include <asm/tlbflush.h> -#include <asm/mmu_context.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -struct efi_memory_map memmap; - -static u64 efi_system_table; - -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), -}; - -static int __init is_normal_ram(efi_memory_desc_t *md) -{ - if (md->attribute & EFI_MEMORY_WB) - return 1; - return 0; -} - -/* - * Translate a EFI virtual address into a physical address: this is necessary, - * as some data members of the EFI system table are virtually remapped after - * SetVirtualAddressMap() has been called. - */ -static phys_addr_t efi_to_phys(unsigned long addr) +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { - efi_memory_desc_t *md; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - /* no virtual mapping has been installed by the stub */ - break; - if (md->virt_addr <= addr && - (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) - return md->phys_addr + addr - md->virt_addr; - } - return addr; -} - -static int __init uefi_init(void) -{ - efi_char16_t *c16; - void *config_tables; - u64 table_size; - char vendor[100] = "unknown"; - int i, retval; - - efi.systab = early_memremap(efi_system_table, - sizeof(efi_system_table_t)); - if (efi.systab == NULL) { - pr_warn("Unable to map EFI system table.\n"); - return -ENOMEM; - } - - set_bit(EFI_BOOT, &efi.flags); - set_bit(EFI_64BIT, &efi.flags); + pteval_t prot_val; /* - * Verify the EFI Table + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. */ - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { - pr_err("System table signature incorrect\n"); - retval = -EINVAL; - goto out; - } - if ((efi.systab->hdr.revision >> 16) < 2) - pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* Show what we know for posterity */ - c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), - sizeof(vendor) * sizeof(efi_char16_t)); - if (c16) { - for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = c16[i]; - vendor[i] = '\0'; - early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); - } - - pr_info("EFI v%u.%.02u by %s\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; - config_tables = early_memremap(efi_to_phys(efi.systab->tables), - table_size); - if (config_tables == NULL) { - pr_warn("Unable to map EFI config table array.\n"); - retval = -ENOMEM; - goto out; - } - retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, - sizeof(efi_config_table_64_t), NULL); - - early_memunmap(config_tables, table_size); -out: - early_memunmap(efi.systab, sizeof(efi_system_table_t)); - return retval; -} - -/* - * Return true for RAM regions we want to permanently reserve. - */ -static __init int is_reserve_region(efi_memory_desc_t *md) -{ - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - case EFI_PERSISTENT_MEMORY: - return 0; - default: - break; - } - return is_normal_ram(md); -} - -static __init void reserve_regions(void) -{ - efi_memory_desc_t *md; - u64 paddr, npages, size; - - if (efi_enabled(EFI_DBG)) - pr_info("Processing EFI memory map:\n"); - - for_each_efi_memory_desc(&memmap, md) { - paddr = md->phys_addr; - npages = md->num_pages; - - if (efi_enabled(EFI_DBG)) { - char buf[64]; - - pr_info(" 0x%012llx-0x%012llx %s", - paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, - efi_md_typeattr_format(buf, sizeof(buf), md)); - } - - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - early_init_dt_add_memory_arch(paddr, size); - - if (is_reserve_region(md)) { - memblock_reserve(paddr, size); - if (efi_enabled(EFI_DBG)) - pr_cont("*"); - } - - if (efi_enabled(EFI_DBG)) - pr_cont("\n"); - } - - set_bit(EFI_MEMMAP, &efi.flags); -} - -void __init efi_init(void) -{ - struct efi_fdt_params params; - - /* Grab UEFI information placed in FDT by stub */ - if (!efi_get_fdt_params(¶ms)) - return; - - efi_system_table = params.system_table; - - memblock_reserve(params.mmap & PAGE_MASK, - PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); - memmap.phys_map = params.mmap; - memmap.map = early_memremap(params.mmap, params.mmap_size); - if (memmap.map == NULL) { - /* - * If we are booting via UEFI, the UEFI memory map is the only - * description of memory we have, so there is little point in - * proceeding if we cannot access it. - */ - panic("Unable to map EFI memory map.\n"); - } - memmap.map_end = memmap.map + params.mmap_size; - memmap.desc_size = params.desc_size; - memmap.desc_version = params.desc_ver; - - if (uefi_init() < 0) - return; - - reserve_regions(); - early_memunmap(memmap.map, params.mmap_size); -} - -static bool __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - init_new_context(NULL, &efi_mm); - - for_each_efi_memory_desc(&memmap, md) { - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (md->virt_addr == 0) - return false; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE || - !PAGE_ALIGNED(md->phys_addr)) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr, - md->num_pages << EFI_PAGE_SHIFT, - __pgprot(pgprot_val(prot) | PTE_NG)); - } - return true; -} - -/* - * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., - * non-early mapping of the UEFI system table and virtual mappings for all - * EFI_MEMORY_RUNTIME regions. - */ -static int __init arm64_enable_runtime_services(void) -{ - u64 mapsize; - - if (!efi_enabled(EFI_BOOT)) { - pr_info("EFI services will not be available.\n"); - return 0; - } - - if (efi_runtime_disabled()) { - pr_info("EFI runtime services will be disabled.\n"); - return 0; - } - - pr_info("Remapping and enabling EFI services.\n"); - - mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache(memmap.phys_map, - mapsize); - if (!memmap.map) { - pr_err("Failed to remap EFI memory map\n"); - return -ENOMEM; - } - memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; - - efi.systab = (__force void *)ioremap_cache(efi_system_table, - sizeof(efi_system_table_t)); - if (!efi.systab) { - pr_err("Failed to remap EFI System Table\n"); - return -ENOMEM; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); - - if (!efi_virtmap_init()) { - pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); - return -ENOMEM; - } - - /* Set up runtime services function pointers */ - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - efi.runtime_version = efi.systab->hdr.revision; - + if ((md->attribute & EFI_MEMORY_WB) == 0) + prot_val = PROT_DEVICE_nGnRE; + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) + prot_val = pgprot_val(PAGE_KERNEL_EXEC); + else + prot_val = pgprot_val(PAGE_KERNEL); + + create_pgd_mapping(mm, md->phys_addr, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + __pgprot(prot_val | PTE_NG)); return 0; } -early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -337,23 +54,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static void efi_set_pgd(struct mm_struct *mm) -{ - switch_mm(NULL, mm, NULL); -} - -void efi_virtmap_load(void) -{ - preempt_disable(); - efi_set_pgd(&efi_mm); -} - -void efi_virtmap_unload(void) -{ - efi_set_pgd(current->active_mm); - preempt_enable(); -} - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 588c8e1778d4..eb185c93dfc6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,7 +29,9 @@ #include <asm/esr.h> #include <asm/irq.h> #include <asm/memory.h> +#include <asm/ptrace.h> #include <asm/thread_info.h> +#include <asm/uaccess.h> #include <asm/unistd.h> /* @@ -104,10 +106,39 @@ str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +alternative_if_not ARM64_HAS_PAN + nop +alternative_else + b 1f // skip TTBR0 PAN +alternative_endif + + .if \el != 0 + mrs x21, ttbr0_el1 + tst x21, #0xffff << 48 // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR + .endif + + uaccess_ttbr0_disable x21 +1: +#endif + stp x22, x23, [sp, #S_PC] /* @@ -139,11 +170,49 @@ /* Restore the task's original addr_limit. */ ldr x20, [sp, #S_ORIG_ADDR_LIMIT] str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter + .endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +alternative_if_not ARM64_HAS_PAN + nop +alternative_else + b 2f // skip TTBR0 PAN +alternative_endif + + .if \el != 0 + tbnz x22, #_PSR_PAN_BIT, 1f // Skip re-enabling TTBR0 access if previously disabled + .endif + + uaccess_ttbr0_enable x0 + + .if \el == 0 + /* + * Enable errata workarounds only if returning to user. The only + * workaround currently required for TTBR0_EL1 changes are for the + * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache + * corruption). + */ + post_ttbr0_update_workaround + .endif +1: + .if \el != 0 + and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + .endif +2: +#endif + + .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -165,6 +234,7 @@ alternative_else alternative_endif #endif .endif + msr elr_el1, x21 // set up the return data msr spsr_el1, x22 ldp x0, x1, [sp, #16 * 0] @@ -187,10 +257,6 @@ alternative_endif eret // return to kernel .endm - .macro get_thread_info, rd - mrs \rd, sp_el0 - .endm - .macro irq_stack_entry mov x19, sp // preserve the original sp @@ -290,7 +356,7 @@ END(vectors) * Invalid mode handlers */ .macro inv_entry, el, reason, regsize = 64 - kernel_entry el, \regsize + kernel_entry \el, \regsize mov x0, sp mov x1, #\reason mrs x2, esr_el1 @@ -349,6 +415,8 @@ el1_sync: lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da + cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 + b.eq el1_ia cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception @@ -360,6 +428,11 @@ el1_sync: cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv + +el1_ia: + /* + * Fall through to the Data abort case + */ el1_da: /* * Data abort handling @@ -419,7 +492,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT @@ -545,7 +617,7 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp bl do_mem_abort b ret_to_user diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 54fd1e2590a2..9bfa58fea8ce 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -321,14 +321,14 @@ __create_page_tables: * dirty cache lines being evicted. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + add x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -406,7 +406,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE dmb sy bl __inval_cache_range diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 29b8c247d56f..6591bf23422b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -347,6 +347,15 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Make sure init_thread_info.ttbr0 always generates translation + * faults in case uaccess_enable() is inadvertently called by the init + * thread. + */ + init_thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#endif + #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c5392081b49b..29d7b68f63f0 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -64,8 +64,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -111,21 +110,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -139,8 +129,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -465,7 +465,7 @@ static const char *esr_class_str[] = { const char *esr_get_class_string(u32 esr) { - return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; + return esr_class_str[ESR_ELx_EC(esr)]; } /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 14813a6ca13b..f1d6c49dcc5f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -114,14 +114,14 @@ SECTIONS *(.got) /* Global offset table */ } + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) _etext = .; /* End of text section */ - RO_DATA(PAGE_SIZE) - EXCEPTION_TABLE(8) + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES ALIGN_DEBUG_RO_MIN(PAGE_SIZE) - _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) @@ -185,6 +185,11 @@ SECTIONS swapper_pg_dir = .; . += SWAPPER_DIR_SIZE; +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + _end = .; STABS_DEBUG diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 5d1cad3ce6d6..08b5f18ba604 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,10 +17,10 @@ */ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> .text @@ -33,8 +33,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x2, x3 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -54,8 +53,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x2 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 0b90497d4424..6505ec81f1da 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -16,11 +16,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -67,12 +67,10 @@ end .req x5 ENTRY(__arch_copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index f7292dd08c84..9b04ff3ab610 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -18,11 +18,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy from user space to user space (alignment handled by the hardware) @@ -68,12 +68,10 @@ end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7a7efe255034..8077e4f34d56 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -16,11 +16,11 @@ #include <linux/linkage.h> -#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> +#include <asm/uaccess.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -66,12 +66,10 @@ end .req x5 ENTRY(__arch_copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 6df07069a025..07d7352d7c38 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,8 +24,6 @@ #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - /* * flush_icache_range(start,end) * @@ -54,7 +52,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 7275628ba59f..25128089c386 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -182,7 +182,12 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: - cpu_switch_mm(mm->pgd, mm); + /* + * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when + * emulating PAN. + */ + if (!system_uses_ttbr0_pan()) + cpu_switch_mm(mm->pgd, mm); } static int asids_init(void) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index edc3e20f7ba9..9cedb10b1107 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -131,6 +131,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma, } #endif +static bool is_el1_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; +} + /* * The kernel tried to access some page that wasn't present. */ @@ -139,8 +144,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, { /* * Are we prepared to handle this kernel fault? + * We are almost certainly not prepared to handle instruction faults. */ - if (fixup_exception(regs)) + if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; /* @@ -202,8 +208,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -242,12 +246,24 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + else + return fsc_type == ESR_ELx_FSC_PERM; +} + +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, @@ -276,17 +292,21 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + if (addr < USER_DS && is_permission_fault(esr, regs)) { + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + if (is_el1_instruction_abort(esr)) + die("Attempting to execute userspace memory", regs, esr); + if (!search_exception_tables(regs->pc)) die("Accessing user space memory outside uaccess.h routines", regs, esr); } @@ -438,7 +458,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; @@ -464,10 +484,10 @@ static struct fault_info { { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous parity error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2c38be3df4c8..678878077996 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -131,7 +131,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) int pfn_valid(unsigned long pfn) { - return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_map_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif @@ -192,8 +192,12 @@ void __init arm64_memblock_init(void) */ memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), ULLONG_MAX); - if (memblock_end_of_DRAM() > linear_region_size) - memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { + /* ensure that memstart_addr remains sufficiently aligned */ + memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, + ARM64_MEMSTART_ALIGN); + memblock_remove(0, memstart_addr); + } /* * Apply the memory limit if it was set. Since the kernel may be loaded @@ -372,6 +376,7 @@ void __init mem_init(void) " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n" " .data : 0x%p" " - 0x%p" " (%6ld KB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" @@ -387,6 +392,7 @@ void __init mem_init(void) MLG(VMALLOC_START, VMALLOC_END), MLK_ROUNDUP(__init_begin, __init_end), MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(__start_rodata, __init_begin), MLK_ROUNDUP(_sdata, _edata), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6a19e1b73d6d..6d2e411b2535 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -386,14 +386,14 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { unsigned long kernel_start = __pa(_stext); - unsigned long kernel_end = __pa(_etext); + unsigned long kernel_end = __pa(__init_begin); /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel text */ + /* No overlap with the kernel text/rodata */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, @@ -402,7 +402,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end } /* - * This block overlaps the kernel text mapping. + * This block overlaps the kernel text/rodata mapping. * Map the portion(s) which don't overlap. */ if (start < kernel_start) @@ -417,7 +417,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end early_pgtable_alloc); /* - * Map the linear alias of the [_stext, _etext) interval as + * Map the linear alias of the [_stext, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. @@ -438,6 +438,8 @@ static void __init map_mem(pgd_t *pgd) if (start >= end) break; + if (memblock_is_nomap(reg)) + continue; __map_memblock(pgd, start, end); } @@ -445,12 +447,18 @@ static void __init map_mem(pgd_t *pgd) void mark_rodata_ro(void) { - if (!IS_ENABLED(CONFIG_DEBUG_RODATA)) - return; + unsigned long section_size; + section_size = (unsigned long)_etext - (unsigned long)_stext; create_mapping_late(__pa(_stext), (unsigned long)_stext, - (unsigned long)__init_begin - (unsigned long)_stext, - PAGE_KERNEL_ROX); + section_size, PAGE_KERNEL_ROX); + /* + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. + */ + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; + create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, + section_size, PAGE_KERNEL_RO); } void fixup_init(void) @@ -489,9 +497,10 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, */ static void __init map_kernel(pgd_t *pgd) { - static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data; + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_chunk(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, &vmlinux_init); map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S deleted file mode 100644 index 984edcda1850..000000000000 --- a/arch/arm64/mm/proc-macros.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Based on arch/arm/mm/proc-macros.S - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register. - */ - .macro dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register. - */ - .macro icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map - */ - .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 - ldr_l \tmpreg, idmap_t0sz - bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif - .endm - -/* - * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present - */ - .macro reset_pmuserenr_el0, tmpreg - mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx \tmpreg, \tmpreg, #8, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f - msr pmuserenr_el0, xzr // Disable PMU access from EL0 -9000: - .endm - -/* - * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) - * - * op: operation passed to dc instruction - * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region - * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 - */ - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 - dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size - sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size - b.lo 9998b - dsb \domain - .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a92738e8b1eb..85a542b21575 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,13 +23,10 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/cpufeature.h> #include <asm/alternative.h> -#include "proc-macros.S" - #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K #elif defined(CONFIG_ARM64_16K_PAGES) @@ -139,17 +136,8 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 - ret - nop - nop - nop -alternative_else - ic iallu - dsb nsh - isb + post_ttbr0_update_workaround ret -alternative_endif ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 8bbe9401f4f0..6d6e4af1a4bf 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,6 +49,7 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/uaccess.h> #include <xen/interface/xen.h> @@ -89,6 +90,24 @@ ENTRY(privcmd_call) mov x2, x3 mov x3, x4 mov x4, x5 +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Privcmd calls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_enable_not_uao x6, x7 +#endif hvc XEN_IMM + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_disable_not_uao x6 +#endif ret ENDPROC(privcmd_call); diff --git a/arch/ia64/include/asm/early_ioremap.h b/arch/ia64/include/asm/early_ioremap.h new file mode 100644 index 000000000000..eec9e1d1b833 --- /dev/null +++ b/arch/ia64/include/asm/early_ioremap.h @@ -0,0 +1,10 @@ +#ifndef _ASM_IA64_EARLY_IOREMAP_H +#define _ASM_IA64_EARLY_IOREMAP_H + +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) + +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); +#define early_memunmap(addr, size) early_iounmap(addr, size) + +#endif diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 8fdb9c7eeb66..5de673ac9cb1 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -20,6 +20,7 @@ */ #include <asm/unaligned.h> +#include <asm/early_ioremap.h> /* We don't use IO slowdowns on the ia64, but.. */ #define __SLOW_DOWN_IO do { } while (0) @@ -427,10 +428,6 @@ __writeq (unsigned long val, volatile void __iomem *addr) extern void __iomem * ioremap(unsigned long offset, unsigned long size); extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap (volatile void __iomem *addr); -extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); -#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) -extern void early_iounmap (volatile void __iomem *addr, unsigned long size); -#define early_memunmap(addr, size) early_iounmap(addr, size) static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned long size) { return ioremap(phys_addr, size); diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 57f52a2afa35..bcf0a9420619 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1003,7 +1003,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) static struct binder_ref *binder_get_ref(struct binder_proc *proc, - uint32_t desc) + uint32_t desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1011,12 +1011,16 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) + if (desc < ref->desc) { n = n->rb_left; - else if (desc > ref->desc) + } else if (desc > ref->desc) { n = n->rb_right; - else + } else if (need_strong_ref && !ref->strong) { + binder_user_error("tried to use weak ref as strong ref\n"); + return NULL; + } else { return ref; + } } return NULL; } @@ -1286,7 +1290,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { pr_err("transaction release %d bad handle %d\n", @@ -1381,7 +1386,7 @@ static void binder_transaction(struct binder_proc *proc, if (tr->target.handle) { struct binder_ref *ref; - ref = binder_get_ref(proc, tr->target.handle); + ref = binder_get_ref(proc, tr->target.handle, true); if (ref == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); @@ -1578,7 +1583,9 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_HANDLE; else fp->type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; fp->handle = ref->desc; + fp->cookie = 0; binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, &thread->todo); @@ -1590,7 +1597,8 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", @@ -1625,7 +1633,9 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } + fp->binder = 0; fp->handle = new_ref->desc; + fp->cookie = 0; binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); trace_binder_transaction_ref_to_ref(t, ref, new_ref); @@ -1679,6 +1689,7 @@ static void binder_transaction(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " fd %d -> %d\n", fp->handle, target_fd); /* TODO: fput? */ + fp->binder = 0; fp->handle = target_fd; } break; @@ -1801,7 +1812,9 @@ static int binder_thread_write(struct binder_proc *proc, ref->desc); } } else - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, + cmd == BC_ACQUIRE || + cmd == BC_RELEASE); if (ref == NULL) { binder_user_error("%d:%d refcount change on invalid ref %d\n", proc->pid, thread->pid, target); @@ -1997,7 +2010,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index ec379a4164cc..f292917b00e7 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -18,3 +18,6 @@ obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o obj-$(CONFIG_EFI_STUB) += libstub/ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o + +arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o +obj-$(CONFIG_ARM64) += $(arm-obj-y) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c new file mode 100644 index 000000000000..9e15d571b53c --- /dev/null +++ b/drivers/firmware/efi/arm-init.c @@ -0,0 +1,209 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013 - 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/init.h> +#include <linux/memblock.h> +#include <linux/mm_types.h> +#include <linux/of.h> +#include <linux/of_fdt.h> + +#include <asm/efi.h> + +struct efi_memory_map memmap; + +u64 efi_system_table; + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; + } + return addr; +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + void *config_tables; + size_t table_size; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + if (IS_ENABLED(CONFIG_64BIT)) + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + retval = -EINVAL; + goto out; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), + sizeof(vendor) * sizeof(efi_char16_t)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + if (config_tables == NULL) { + pr_warn("Unable to map EFI config table array.\n"); + retval = -ENOMEM; + goto out; + } + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_t), NULL); + + early_memunmap(config_tables, table_size); +out: + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + return retval; +} + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: + return 0; + default: + break; + } + return is_normal_ram(md); +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (efi_enabled(EFI_DBG)) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (efi_enabled(EFI_DBG)) { + char buf[64]; + + pr_info(" 0x%012llx-0x%012llx %s", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + efi_md_typeattr_format(buf, sizeof(buf), md)); + } + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md)) { + memblock_mark_nomap(paddr, size); + if (efi_enabled(EFI_DBG)) + pr_cont("*"); + } + + if (efi_enabled(EFI_DBG)) + pr_cont("\n"); + } + + set_bit(EFI_MEMMAP, &efi.flags); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms)) + return; + + efi_system_table = params.system_table; + + memmap.phys_map = params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + if (memmap.map == NULL) { + /* + * If we are booting via UEFI, the UEFI memory map is the only + * description of memory we have, so there is little point in + * proceeding if we cannot access it. + */ + panic("Unable to map EFI memory map.\n"); + } + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); + early_memunmap(memmap.map, params.mmap_size); + memblock_mark_nomap(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + + (params.mmap & ~PAGE_MASK))); +} diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c new file mode 100644 index 000000000000..6ae21e41a429 --- /dev/null +++ b/drivers/firmware/efi/arm-runtime.c @@ -0,0 +1,135 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/mm_types.h> +#include <linux/preempt.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/mmu.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> + +extern u64 efi_system_table; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), +}; + +static bool __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + efi_mm.pgd = pgd_alloc(&efi_mm); + init_new_context(NULL, &efi_mm); + + for_each_efi_memory_desc(&memmap, md) { + phys_addr_t phys = md->phys_addr; + int ret; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + return false; + + ret = efi_create_mapping(&efi_mm, md); + if (!ret) { + pr_info(" EFI remap %pa => %p\n", + &phys, (void *)(unsigned long)md->virt_addr); + } else { + pr_warn(" EFI remap %pa: failed to create mapping (%d)\n", + &phys, ret); + return false; + } + } + return true; +} + +/* + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. + */ +static int __init arm_enable_runtime_services(void) +{ + u64 mapsize; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return 0; + } + + if (efi_runtime_disabled()) { + pr_info("EFI runtime services will be disabled.\n"); + return 0; + } + + pr_info("Remapping and enabling EFI services.\n"); + + mapsize = memmap.map_end - memmap.map; + memmap.map = (__force void *)ioremap_cache(memmap.phys_map, + mapsize); + if (!memmap.map) { + pr_err("Failed to remap EFI memory map\n"); + return -ENOMEM; + } + memmap.map_end = memmap.map + mapsize; + efi.memmap = &memmap; + + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); + if (!efi.systab) { + pr_err("Failed to remap EFI System Table\n"); + return -ENOMEM; + } + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + if (!efi_virtmap_init()) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); + return -ENOMEM; + } + + /* Set up runtime services function pointers */ + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + efi.runtime_version = efi.systab->hdr.revision; + + return 0; +} +early_initcall(arm_enable_runtime_services); + +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} + +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3b52677f459a..c51f3b2fe3c0 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -25,6 +25,8 @@ #include <linux/io.h> #include <linux/platform_device.h> +#include <asm/early_ioremap.h> + struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, diff --git a/drivers/md/dm-android-verity.c b/drivers/md/dm-android-verity.c index 15ce2a81c1f4..bb6c1285e499 100644 --- a/drivers/md/dm-android-verity.c +++ b/drivers/md/dm-android-verity.c @@ -266,10 +266,7 @@ static inline int validate_fec_header(struct fec_header *header, u64 offset) le32_to_cpu(header->version) != FEC_VERSION || le32_to_cpu(header->size) != sizeof(struct fec_header) || le32_to_cpu(header->roots) == 0 || - le32_to_cpu(header->roots) >= FEC_RSM || - offset < le32_to_cpu(header->fec_size) || - offset - le32_to_cpu(header->fec_size) != - le64_to_cpu(header->inp_size)) + le32_to_cpu(header->roots) >= FEC_RSM) return -EINVAL; return 0; diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 1689075e2229..2986e270d19a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -183,6 +183,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) pr_debug("%s: %d bytes transferred: %d\n", mmc_hostname(host), mrq->data->bytes_xfered, mrq->data->error); +#ifdef CONFIG_BLOCK if (mrq->lat_hist_enabled) { ktime_t completion; u_int64_t delta_us; @@ -194,6 +195,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) (mrq->data->flags & MMC_DATA_READ), delta_us); } +#endif trace_mmc_blk_rw_end(cmd->opcode, cmd->arg, mrq->data); } @@ -638,11 +640,13 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host, } if (!err && areq) { +#ifdef CONFIG_BLOCK if (host->latency_hist_enabled) { areq->mrq->io_start = ktime_get(); areq->mrq->lat_hist_enabled = 1; } else areq->mrq->lat_hist_enabled = 0; +#endif trace_mmc_blk_rw_start(areq->mrq->cmd->opcode, areq->mrq->cmd->arg, areq->mrq->data); @@ -2923,6 +2927,7 @@ static void __exit mmc_exit(void) destroy_workqueue(workqueue); } +#ifdef CONFIG_BLOCK static ssize_t latency_hist_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2970,6 +2975,7 @@ mmc_latency_hist_sysfs_exit(struct mmc_host *host) { device_remove_file(&host->class_dev, &dev_attr_latency_hist); } +#endif subsys_initcall(mmc_init); module_exit(mmc_exit); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 17068839c74b..443fdfc22d8a 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -392,7 +392,9 @@ int mmc_add_host(struct mmc_host *host) mmc_add_host_debugfs(host); #endif +#ifdef CONFIG_BLOCK mmc_latency_hist_sysfs_init(host); +#endif mmc_start_host(host); if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY)) @@ -422,7 +424,9 @@ void mmc_remove_host(struct mmc_host *host) mmc_remove_host_debugfs(host); #endif +#ifdef CONFIG_BLOCK mmc_latency_hist_sysfs_exit(host); +#endif device_del(&host->class_dev); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index df560216d702..374f840f31a4 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -387,13 +387,22 @@ static void ion_handle_get(struct ion_handle *handle) kref_get(&handle->ref); } -static int ion_handle_put(struct ion_handle *handle) +static int ion_handle_put_nolock(struct ion_handle *handle) +{ + int ret; + + ret = kref_put(&handle->ref, ion_handle_destroy); + + return ret; +} + +int ion_handle_put(struct ion_handle *handle) { struct ion_client *client = handle->client; int ret; mutex_lock(&client->lock); - ret = kref_put(&handle->ref, ion_handle_destroy); + ret = ion_handle_put_nolock(handle); mutex_unlock(&client->lock); return ret; @@ -417,20 +426,30 @@ static struct ion_handle *ion_handle_lookup(struct ion_client *client, return ERR_PTR(-EINVAL); } -static struct ion_handle *ion_handle_get_by_id(struct ion_client *client, +static struct ion_handle *ion_handle_get_by_id_nolock(struct ion_client *client, int id) { struct ion_handle *handle; - mutex_lock(&client->lock); handle = idr_find(&client->idr, id); if (handle) ion_handle_get(handle); - mutex_unlock(&client->lock); return handle ? handle : ERR_PTR(-EINVAL); } +struct ion_handle *ion_handle_get_by_id(struct ion_client *client, + int id) +{ + struct ion_handle *handle; + + mutex_lock(&client->lock); + handle = ion_handle_get_by_id_nolock(client, id); + mutex_unlock(&client->lock); + + return handle; +} + static bool ion_handle_validate(struct ion_client *client, struct ion_handle *handle) { @@ -532,22 +551,28 @@ struct ion_handle *ion_alloc(struct ion_client *client, size_t len, } EXPORT_SYMBOL(ion_alloc); -void ion_free(struct ion_client *client, struct ion_handle *handle) +static void ion_free_nolock(struct ion_client *client, struct ion_handle *handle) { bool valid_handle; BUG_ON(client != handle->client); - mutex_lock(&client->lock); valid_handle = ion_handle_validate(client, handle); if (!valid_handle) { WARN(1, "%s: invalid handle passed to free.\n", __func__); - mutex_unlock(&client->lock); return; } + ion_handle_put_nolock(handle); +} + +void ion_free(struct ion_client *client, struct ion_handle *handle) +{ + BUG_ON(client != handle->client); + + mutex_lock(&client->lock); + ion_free_nolock(client, handle); mutex_unlock(&client->lock); - ion_handle_put(handle); } EXPORT_SYMBOL(ion_free); @@ -1283,11 +1308,15 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct ion_handle *handle; - handle = ion_handle_get_by_id(client, data.handle.handle); - if (IS_ERR(handle)) + mutex_lock(&client->lock); + handle = ion_handle_get_by_id_nolock(client, data.handle.handle); + if (IS_ERR(handle)) { + mutex_unlock(&client->lock); return PTR_ERR(handle); - ion_free(client, handle); - ion_handle_put(handle); + } + ion_free_nolock(client, handle); + ion_handle_put_nolock(handle); + mutex_unlock(&client->lock); break; } case ION_IOC_SHARE: diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 92e6726f6e37..21f198aa0961 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) static ssize_t read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) { + char *buf = file->private_data; ssize_t acc = 0; size_t size, tsz; size_t elf_buflen; @@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); + vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; - } - kfree(elf_buf); } else { if (kern_addr_valid(start)) { unsigned long n; - n = copy_to_user(buffer, (char *)start, tsz); + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + memcpy(buf, (char *) start, tsz); + n = copy_to_user(buffer, buf, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens @@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, + .release = release_kcore, .llseek = default_llseek, }; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 24daf8fc4d7c..fec66f86eeff 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -25,6 +25,7 @@ enum { MEMBLOCK_NONE = 0x0, /* No special request */ MEMBLOCK_HOTPLUG = 0x1, /* hotpluggable region */ MEMBLOCK_MIRROR = 0x2, /* mirrored region */ + MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ }; struct memblock_region { @@ -82,6 +83,7 @@ bool memblock_overlaps_region(struct memblock_type *type, int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); +int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); /* Low level functions */ @@ -184,6 +186,11 @@ static inline bool memblock_is_mirror(struct memblock_region *m) return m->flags & MEMBLOCK_MIRROR; } +static inline bool memblock_is_nomap(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_NOMAP; +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); @@ -319,6 +326,7 @@ phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); int memblock_is_memory(phys_addr_t addr); +int memblock_is_map_memory(phys_addr_t addr); int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); int memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 3349f0676acb..0860efd6e1be 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -137,7 +137,9 @@ struct mmc_request { void (*done)(struct mmc_request *);/* completion function */ struct mmc_host *host; ktime_t io_start; +#ifdef CONFIG_BLOCK int lat_hist_enabled; +#endif }; struct mmc_card; diff --git a/kernel/audit.c b/kernel/audit.c index 5ffcbd354a52..34f690b9213a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -870,6 +870,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } if (s.mask & AUDIT_STATUS_PID) { + /* NOTE: we are using task_tgid_vnr() below because + * the s.pid value is relative to the namespace + * of the caller; at present this doesn't matter + * much since you can really only run auditd + * from the initial pid namespace, but something + * to keep in mind if this changes */ int new_pid = s.pid; if ((!new_pid) && (task_tgid_vnr(current) != audit_pid)) @@ -1896,7 +1902,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) " euid=%u suid=%u fsuid=%u" " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", task_ppid_nr(tsk), - task_pid_nr(tsk), + task_tgid_nr(tsk), from_kuid(&init_user_ns, audit_get_loginuid(tsk)), from_kuid(&init_user_ns, cred->uid), from_kgid(&init_user_ns, cred->gid), diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 48f45987dc6c..63f0e495f517 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -458,7 +458,7 @@ static int audit_filter_rules(struct task_struct *tsk, switch (f->type) { case AUDIT_PID: - pid = task_pid_nr(tsk); + pid = task_tgid_nr(tsk); result = audit_comparator(pid, f->op, f->val); break; case AUDIT_PPID: @@ -1987,7 +1987,7 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (!ab) return; - audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); + audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid); audit_log_task_context(ab); audit_log_format(ab, " old-auid=%u auid=%u old-ses=%u ses=%u res=%d", oldloginuid, loginuid, oldsessionid, sessionid, !rc); @@ -2212,7 +2212,7 @@ void __audit_ptrace(struct task_struct *t) { struct audit_context *context = current->audit_context; - context->target_pid = task_pid_nr(t); + context->target_pid = task_tgid_nr(t); context->target_auid = audit_get_loginuid(t); context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); @@ -2237,7 +2237,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { - audit_sig_pid = task_pid_nr(tsk); + audit_sig_pid = task_tgid_nr(tsk); if (uid_valid(tsk->loginuid)) audit_sig_uid = tsk->loginuid; else @@ -2337,7 +2337,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, void __audit_log_capset(const struct cred *new, const struct cred *old) { struct audit_context *context = current->audit_context; - context->capset.pid = task_pid_nr(current); + context->capset.pid = task_tgid_nr(current); context->capset.cap.effective = new->cap_effective; context->capset.cap.inheritable = new->cap_effective; context->capset.cap.permitted = new->cap_permitted; @@ -2369,7 +2369,7 @@ static void audit_log_task(struct audit_buffer *ab) from_kgid(&init_user_ns, gid), sessionid); audit_log_task_context(ab); - audit_log_format(ab, " pid=%d comm=", task_pid_nr(current)); + audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); audit_log_untrustedstring(ab, get_task_comm(comm, current)); audit_log_d_path_exe(ab, current->mm); } diff --git a/mm/memblock.c b/mm/memblock.c index d300f1329814..07ff069fef25 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -822,6 +822,17 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR); } +/** + * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP. + * @base: the base phys addr of the region + * @size: the size of the region + * + * Return 0 on success, -errno on failure. + */ +int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size) +{ + return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP); +} /** * __next_reserved_mem_region - next function for for_each_reserved_region() @@ -913,6 +924,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags, if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) continue; + /* skip nomap memory unless we were asked for it explicitly */ + if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) + continue; + if (!type_b) { if (out_start) *out_start = m_start; @@ -1022,6 +1037,10 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags, if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m)) continue; + /* skip nomap memory unless we were asked for it explicitly */ + if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m)) + continue; + if (!type_b) { if (out_start) *out_start = m_start; @@ -1519,6 +1538,15 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) return memblock_search(&memblock.memory, addr) != -1; } +int __init_memblock memblock_is_map_memory(phys_addr_t addr) +{ + int i = memblock_search(&memblock.memory, addr); + + if (i == -1) + return false; + return !memblock_is_nomap(&memblock.memory.regions[i]); +} + #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP int __init_memblock memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn) diff --git a/security/lsm_audit.c b/security/lsm_audit.c index cccbf3068cdc..45d927ab807d 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -220,7 +220,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, */ BUILD_BUG_ON(sizeof(a->u) > sizeof(void *)*2); - audit_log_format(ab, " pid=%d comm=", task_pid_nr(current)); + audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current)); audit_log_untrustedstring(ab, memcpy(comm, current->comm, sizeof(comm))); switch (a->type) { @@ -294,7 +294,7 @@ static void dump_common_audit_data(struct audit_buffer *ab, case LSM_AUDIT_DATA_TASK: { struct task_struct *tsk = a->u.tsk; if (tsk) { - pid_t pid = task_pid_nr(tsk); + pid_t pid = task_tgid_nr(tsk); if (pid) { char comm[sizeof(tsk->comm)]; audit_log_format(ab, " opid=%d ocomm=", pid); |
