summaryrefslogtreecommitdiff
path: root/arch/arm64/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/mm/fault.c')
-rw-r--r--arch/arm64/mm/fault.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4c1a118c1d09..fb987b0fa53e 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -29,7 +29,9 @@
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
+#include <linux/preempt.h>
+#include <asm/bug.h>
#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
@@ -64,21 +66,21 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
break;
pud = pud_offset(pgd, addr);
- printk(", *pud=%016llx", pud_val(*pud));
+ pr_cont(", *pud=%016llx", pud_val(*pud));
if (pud_none(*pud) || pud_bad(*pud))
break;
pmd = pmd_offset(pud, addr);
- printk(", *pmd=%016llx", pmd_val(*pmd));
+ pr_cont(", *pmd=%016llx", pmd_val(*pmd));
if (pmd_none(*pmd) || pmd_bad(*pmd))
break;
pte = pte_offset_map(pmd, addr);
- printk(", *pte=%016llx", pte_val(*pte));
+ pr_cont(", *pte=%016llx", pte_val(*pte));
pte_unmap(pte);
} while(0);
- printk("\n");
+ pr_cont("\n");
}
#ifdef CONFIG_ARM64_HW_AFDBM
@@ -105,32 +107,38 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
/* only preserve the access flags and write permission */
pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
- /*
- * PTE_RDONLY is cleared by default in the asm below, so set it in
- * back if necessary (read-only or clean PTE).
- */
+ /* set PTE_RDONLY if actual read-only or clean PTE */
if (!pte_write(entry) || !pte_sw_dirty(entry))
pte_val(entry) |= PTE_RDONLY;
/*
* Setting the flags must be done atomically to avoid racing with the
- * hardware update of the access/dirty state.
+ * hardware update of the access/dirty state. The PTE_RDONLY bit must
+ * be set to the most permissive (lowest value) of *ptep and entry
+ * (calculated as: a & b == ~(~a | ~b)).
*/
+ pte_val(entry) ^= PTE_RDONLY;
asm volatile("// ptep_set_access_flags\n"
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
- " and %0, %0, %3 // clear PTE_RDONLY\n"
+ " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n"
" orr %0, %0, %4 // set flags\n"
+ " eor %0, %0, %3 // negate final PTE_RDONLY\n"
" stxr %w1, %0, %2\n"
" cbnz %w1, 1b\n"
: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
- : "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+ : "L" (PTE_RDONLY), "r" (pte_val(entry)));
flush_tlb_fix_spurious_fault(vma, address);
return 1;
}
#endif
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
/*
* The kernel tried to access some page that wasn't present.
*/
@@ -139,8 +147,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
{
/*
* Are we prepared to handle this kernel fault?
+ * We are almost certainly not prepared to handle instruction faults.
*/
- if (fixup_exception(regs))
+ if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
return;
/*
@@ -202,8 +211,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
#define VM_FAULT_BADMAP 0x010000
#define VM_FAULT_BADACCESS 0x020000
-#define ESR_LNX_EXEC (1 << 24)
-
static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
struct task_struct *tsk)
@@ -242,6 +249,26 @@ out:
return fault;
}
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
+{
+ unsigned int ec = ESR_ELx_EC(esr);
+ unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+ if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ return false;
+
+ if (system_uses_ttbr0_pan())
+ return fsc_type == ESR_ELx_FSC_FAULT &&
+ (regs->pstate & PSR_PAN_BIT);
+ else
+ return fsc_type == ESR_ELx_FSC_PERM;
+}
+
+static bool is_el0_instruction_abort(unsigned int esr)
+{
+ return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
@@ -268,19 +295,24 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
- if (esr & ESR_LNX_EXEC) {
+ if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
- /*
- * PAN bit set implies the fault happened in kernel space, but not
- * in the arch's user access functions.
- */
- if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
- goto no_context;
+ if (addr < USER_DS && is_permission_fault(esr, regs)) {
+ /* regs->orig_addr_limit may be 0 if we entered from EL0 */
+ if (regs->orig_addr_limit == KERNEL_DS)
+ die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
+
+ if (is_el1_instruction_abort(esr))
+ die("Attempting to execute userspace memory", regs, esr);
+
+ if (!search_exception_tables(regs->pc))
+ die("Accessing user space memory outside uaccess.h routines", regs, esr);
+ }
/*
* As per x86, we may deadlock here. However, since the kernel only
@@ -311,8 +343,11 @@ retry:
* signal first. We do not need to release the mmap_sem because it
* would already be released in __lock_page_or_retry in mm/filemap.c.
*/
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ if (!user_mode(regs))
+ goto no_context;
return 0;
+ }
/*
* Major/minor page fault accounting is only done on the initial
@@ -429,7 +464,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
-static struct fault_info {
+static const struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
int sig;
int code;
@@ -442,7 +477,7 @@ static struct fault_info {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
- { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
+ { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
@@ -606,8 +641,30 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
}
#ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void *__unused)
+int cpu_enable_pan(void *__unused)
{
+ /*
+ * We modify PSTATE. This won't work from irq context as the PSTATE
+ * is discarded once we return from the exception.
+ */
+ WARN_ON_ONCE(in_interrupt());
+
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
+ asm(SET_PSTATE_PAN(1));
+ return 0;
}
#endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+int cpu_enable_uao(void *__unused)
+{
+ asm(SET_PSTATE_UAO(1));
+ return 0;
+}
+#endif /* CONFIG_ARM64_UAO */