diff options
Diffstat (limited to 'arch/x86/kernel')
26 files changed, 249 insertions, 116 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d6f375f1b928..89829c3d5a74 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -45,17 +45,6 @@ static int __init setup_noreplace_smp(char *str) } __setup("noreplace-smp", setup_noreplace_smp); -#ifdef CONFIG_PARAVIRT -static int __initdata_or_module noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ @@ -587,9 +576,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *p; char insnbuf[MAX_PATCH_LEN]; - if (noreplace_paravirt) - return; - for (p = start; p < end; p++) { unsigned int used; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a41e523536a2..592e260ba05b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -91,8 +91,12 @@ out_data: return NULL; } -static void free_apic_chip_data(struct apic_chip_data *data) +static void free_apic_chip_data(unsigned int virq, struct apic_chip_data *data) { +#ifdef CONFIG_X86_IO_APIC + if (virq < nr_legacy_irqs()) + legacy_irq_data[virq] = NULL; +#endif if (data) { free_cpumask_var(data->domain); free_cpumask_var(data->old_domain); @@ -316,11 +320,7 @@ static void x86_vector_free_irqs(struct irq_domain *domain, apic_data = irq_data->chip_data; irq_domain_reset_irq_data(irq_data); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_apic_chip_data(apic_data); -#ifdef CONFIG_X86_IO_APIC - if (virq + i < nr_legacy_irqs()) - legacy_irq_data[virq + i] = NULL; -#endif + free_apic_chip_data(virq + i, apic_data); } } } @@ -361,7 +361,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, err = assign_irq_vector_policy(virq + i, node, data, info); if (err) { irq_data->chip_data = NULL; - free_apic_chip_data(data); + free_apic_chip_data(virq + i, data); goto error; } } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8cacf62ec458..2bbc74f8a4a8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/utsname.h> #include <linux/cpu.h> +#include <linux/module.h> #include <asm/nospec-branch.h> #include <asm/cmdline.h> @@ -89,20 +90,42 @@ static const char *spectre_v2_strings[] = { }; #undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +#ifdef RETPOLINE +static bool spectre_v2_bad_module; + +bool retpoline_module_ok(bool has_retpoline) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) + return true; + + pr_err("System may be vulnerable to spectre v2\n"); + spectre_v2_bad_module = true; + return false; +} + +static inline const char *spectre_v2_module_string(void) +{ + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; +} +#else +static inline const char *spectre_v2_module_string(void) { return ""; } +#endif + static void __init spec2_print_if_insecure(const char *reason) { if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static void __init spec2_print_if_secure(const char *reason) { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s\n", reason); + pr_info("%s selected on command line.\n", reason); } static inline bool retp_compiler(void) @@ -117,42 +140,67 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +static const struct { + const char *option; + enum spectre_v2_mitigation_cmd cmd; + bool secure; +} mitigation_options[] = { + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, +}; + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { char arg[20]; - int ret; - - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, - sizeof(arg)); - if (ret > 0) { - if (match_option(arg, ret, "off")) { - goto disable; - } else if (match_option(arg, ret, "on")) { - spec2_print_if_secure("force enabled on command line."); - return SPECTRE_V2_CMD_FORCE; - } else if (match_option(arg, ret, "retpoline")) { - spec2_print_if_insecure("retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE; - } else if (match_option(arg, ret, "retpoline,amd")) { - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec2_print_if_insecure("AMD retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_AMD; - } else if (match_option(arg, ret, "retpoline,generic")) { - spec2_print_if_insecure("generic retpoline selected on command line."); - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; - } else if (match_option(arg, ret, "auto")) { + int ret, i; + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; + + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_NONE; + else { + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); return SPECTRE_V2_CMD_AUTO; } } - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + !IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; -disable: - spec2_print_if_insecure("disabled on command line."); - return SPECTRE_V2_CMD_NONE; + } + + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + + if (mitigation_options[i].secure) + spec2_print_if_secure(mitigation_options[i].option); + else + spec2_print_if_insecure(mitigation_options[i].option); + + return cmd; } /* Check for Skylake-like CPUs (for RSB handling) */ @@ -190,10 +238,10 @@ static void __init spectre_v2_select_mitigation(void) return; case SPECTRE_V2_CMD_FORCE: - /* FALLTRHU */ case SPECTRE_V2_CMD_AUTO: - goto retpoline_auto; - + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; case SPECTRE_V2_CMD_RETPOLINE_AMD: if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_amd; @@ -268,7 +316,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, { if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Vulnerable\n"); + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } ssize_t cpu_show_spectre_v2(struct device *dev, @@ -277,6 +325,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); + return sprintf(buf, "%s%s\n", spectre_v2_strings[spectre_v2_enabled], + spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f7f2ad3687ee..8eabbafff213 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -955,7 +955,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) int i; c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; + c->x86_cache_size = 0; c->x86_vendor = X86_VENDOR_UNKNOWN; c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 4cfba4371a71..101bfae369e1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -152,7 +152,6 @@ static void raise_mce(struct mce *m) if (context == MCJ_CTX_RANDOM) return; -#ifdef CONFIG_X86_LOCAL_APIC if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -193,9 +192,7 @@ static void raise_mce(struct mce *m) raise_local(); put_cpu(); put_online_cpus(); - } else -#endif - { + } else { preempt_disable(); raise_local(); preempt_enable(); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 364fbad72e60..6edb9530d7e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -60,6 +60,9 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); smp_load_acquire(&(p)); \ }) +/* sysfs synchronization */ +static DEFINE_MUTEX(mce_sysfs_mutex); + #define CREATE_TRACE_POINTS #include <trace/events/mce.h> @@ -2220,6 +2223,7 @@ static ssize_t set_ignore_ce(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.ignore_ce ^ !!new) { if (new) { /* disable ce features */ @@ -2232,6 +2236,8 @@ static ssize_t set_ignore_ce(struct device *s, on_each_cpu(mce_enable_ce, (void *)1, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2244,6 +2250,7 @@ static ssize_t set_cmci_disabled(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.cmci_disabled ^ !!new) { if (new) { /* disable cmci */ @@ -2255,6 +2262,8 @@ static ssize_t set_cmci_disabled(struct device *s, on_each_cpu(mce_enable_ce, NULL, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2262,8 +2271,19 @@ static ssize_t store_int_with_restart(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { - ssize_t ret = device_store_int(s, attr, buf, size); + unsigned long old_check_interval = check_interval; + ssize_t ret = device_store_ulong(s, attr, buf, size); + + if (check_interval == old_check_interval) + return ret; + + if (check_interval < 1) + check_interval = 1; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 2a0f44d225fe..6da6f9cd6d2d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -131,6 +131,9 @@ static size_t compute_container_size(u8 *data, u32 total_size) return size; } +static enum ucode_state +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); + /* * Early load occurs before we can vmalloc(). So we look for the microcode * patch container file in initrd, traverse equivalent cpu table, look for a @@ -438,7 +441,7 @@ int __init save_microcode_in_initrd_amd(void) eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); + ret = load_microcode_amd(true, eax, container, container_size); if (ret != UCODE_OK) retval = -EINVAL; @@ -854,7 +857,8 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, return UCODE_OK; } -enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +static enum ucode_state +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -868,8 +872,8 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s #ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ - if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(cpu); + if (save) { + struct ucode_patch *p = find_patch(0); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -901,11 +905,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + if (!refresh_fw || !bsp) return UCODE_OK; if (c->x86 >= 0x15) @@ -922,7 +927,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); + ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b3e94ef461fd..ce5f8a2e7ae6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -44,7 +44,7 @@ static struct microcode_ops *microcode_ops; -static bool dis_ucode_ldr; +static bool dis_ucode_ldr = true; static int __init disable_loader(char *str) { @@ -81,6 +81,7 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { + u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); const char *opt = "dis_ucode_ldr"; @@ -93,8 +94,20 @@ static bool __init check_loader_disabled_bsp(void) bool *res = &dis_ucode_ldr; #endif - if (cmdline_find_option_bool(cmdline, option)) - *res = true; + a = 1; + c = 0; + native_cpuid(&a, &b, &c, &d); + + /* + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not + * completely accurate as xen pv guests don't see that CPUID bit set but + * that's good enough as they don't land on the BSP path anyway. + */ + if (c & BIT(31)) + return *res; + + if (cmdline_find_option_bool(cmdline, option) <= 0) + *res = false; return *res; } @@ -122,9 +135,7 @@ void __init load_ucode_bsp(void) { int vendor; unsigned int family; - - if (check_loader_disabled_bsp()) - return; + bool intel = true; if (!have_cpuid_p()) return; @@ -134,16 +145,27 @@ void __init load_ucode_bsp(void) switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_bsp(); + if (family < 6) + return; break; + case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (family < 0x10) + return; + intel = false; break; + default: - break; + return; } + + if (check_loader_disabled_bsp()) + return; + + if (intel) + load_ucode_intel_bsp(); + else + load_ucode_amd_bsp(family); } static bool check_loader_disabled_ap(void) @@ -162,9 +184,6 @@ void load_ucode_ap(void) if (check_loader_disabled_ap()) return; - if (!have_cpuid_p()) - return; - vendor = x86_vendor(); family = x86_family(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 2c76a1801393..2f38a99cdb98 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -1075,7 +1075,7 @@ static struct microcode_ops microcode_intel_ops = { static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) { - u64 llc_size = c->x86_cache_size * 1024; + u64 llc_size = c->x86_cache_size * 1024ULL; do_div(llc_size, c->x86_max_cores); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b2f2306fbcc..fbf2edc3eb35 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -25,6 +25,7 @@ #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/device.h> +#include <linux/nospec.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -188,8 +189,8 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { - u64 val, val_fail, val_new= ~0; - int i, reg, reg_fail, ret = 0; + u64 val, val_fail = -1, val_new= ~0; + int i, reg, reg_fail = -1, ret = 0; int bios_fail = 0; int reg_safe = -1; @@ -297,17 +298,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; @@ -404,6 +408,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 5cc2242d77c6..7b79c80ce029 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2716,7 +2716,7 @@ static unsigned bdw_limit_period(struct perf_event *event, unsigned left) X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c index 75a38b5a2e26..5b8c90935270 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c @@ -88,6 +88,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include "perf_event.h" @@ -409,6 +410,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index ec863b9a9f78..067427384a63 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c @@ -1,4 +1,5 @@ #include <linux/perf_event.h> +#include <linux/nospec.h> enum perf_msr_id { PERF_MSR_TSC = 0, @@ -115,9 +116,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -128,6 +126,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 18ca99f2798b..935225c0375f 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -87,8 +87,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); + if (c->x86_cache_size) + seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size); show_cpuinfo_core(m, c, cpu); show_cpuinfo_misc(m, c); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8f1a3f443f7d..70284d38fdc2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -669,14 +669,17 @@ __PAGE_ALIGNED_BSS initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -ENTRY(initial_page_table) +.globl initial_page_table +initial_page_table: .fill 1024,4,0 #endif initial_pg_fixmap: .fill 1024,4,0 -ENTRY(empty_zero_page) +.globl empty_zero_page +empty_zero_page: .fill 4096,1,0 -ENTRY(swapper_pg_dir) +.globl swapper_pg_dir +swapper_pg_dir: .fill 1024,4,0 /* diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index be22f5a2192e..4e3b8a587c88 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -418,6 +418,7 @@ struct legacy_pic default_legacy_pic = { }; struct legacy_pic *legacy_pic = &default_legacy_pic; +EXPORT_SYMBOL(legacy_pic); static int __init i8259A_init_ops(void) { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3eb804335458..30bc63876035 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -50,6 +50,8 @@ #include <linux/kallsyms.h> #include <linux/ftrace.h> #include <linux/kasan.h> +#include <linux/moduleloader.h> + #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> @@ -196,6 +198,8 @@ retry: return (opcode != 0x62 && opcode != 0x67); case 0x70: return 0; /* can't boost conditional jump */ + case 0x90: + return opcode != 0x9a; /* can't boost call far */ case 0xc0: /* can't boost software-interruptions */ return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; @@ -400,10 +404,20 @@ int __copy_instruction(u8 *dest, u8 *src) return length; } +/* Recover page to RW mode before releasing it */ +void free_insn_page(void *page) +{ + set_memory_nx((unsigned long)page & PAGE_MASK, 1); + set_memory_rw((unsigned long)page & PAGE_MASK, 1); + module_memfree(page); +} + static int arch_copy_kprobe(struct kprobe *p) { int ret; + set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + /* Copy an instruction with recovering if other optprobe modifies it.*/ ret = __copy_instruction(p->ainsn.insn, p->addr); if (!ret) @@ -418,6 +432,8 @@ static int arch_copy_kprobe(struct kprobe *p) else p->ainsn.boostable = -1; + set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + /* Check whether the instruction modifies Interrupt Flag or not */ p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index ea8e2b846101..7aba9d6475a5 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -370,6 +370,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, } buf = (u8 *)op->optinsn.insn; + set_memory_rw((unsigned long)buf & PAGE_MASK, 1); /* Copy instructions into the out-of-line buffer */ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); @@ -392,6 +393,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, (u8 *)op->kp.addr + op->optinsn.size); + set_memory_ro((unsigned long)buf & PAGE_MASK, 1); + flush_icache_range((unsigned long) buf, (unsigned long) buf + TMPL_END_IDX + op->optinsn.size + RELATIVEJUMP_SIZE); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 469b23d6acc2..fd7e9937ddd6 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -71,12 +71,17 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { free_page((unsigned long)image->arch.pgd); + image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); + image->arch.pmd0 = NULL; free_page((unsigned long)image->arch.pmd1); + image->arch.pmd1 = NULL; #endif free_page((unsigned long)image->arch.pte0); + image->arch.pte0 = NULL; free_page((unsigned long)image->arch.pte1); + image->arch.pte1 = NULL; } static int machine_kexec_alloc_page_tables(struct kimage *image) @@ -93,7 +98,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) !image->arch.pmd0 || !image->arch.pmd1 || #endif !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); return -ENOMEM; } return 0; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 819ab3f9c9c7..13d6b8ac0b0b 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -37,8 +37,11 @@ static struct kexec_file_ops *kexec_file_loaders[] = { static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.pud); + image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); + image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); + image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) @@ -79,7 +82,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); return 0; err: - free_transition_pgtable(image); return result; } @@ -519,6 +521,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 005c03e93fc5..94779f66bf49 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -170,19 +170,28 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_X86_64_NONE: break; case R_X86_64_64: + if (*(u64 *)loc != 0) + goto invalid_relocation; *(u64 *)loc = val; break; case R_X86_64_32: + if (*(u32 *)loc != 0) + goto invalid_relocation; *(u32 *)loc = val; if (val != *(u32 *)loc) goto overflow; break; case R_X86_64_32S: + if (*(s32 *)loc != 0) + goto invalid_relocation; *(s32 *)loc = val; if ((s64)val != *(s32 *)loc) goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: + if (*(u32 *)loc != 0) + goto invalid_relocation; val -= (u64)loc; *(u32 *)loc = val; #if 0 @@ -198,6 +207,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } return 0; +invalid_relocation: + pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), loc, val); + return -ENOEXEC; + overflow: pr_err("overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c1b21d61b769..2754dad26839 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -81,10 +81,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) /* * Free current thread data structures etc.. */ -void exit_thread(void) +void exit_thread(struct task_struct *tsk) { - struct task_struct *me = current; - struct thread_struct *t = &me->thread; + struct thread_struct *t = &tsk->thread; unsigned long *bp = t->io_bitmap_ptr; struct fpu *fpu = &t->fpu; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe89f938e0f0..00c7878043ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1442,6 +1442,8 @@ static inline void mwait_play_dead(void) void *mwait_ptr; int i; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLUSH)) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 22b81f35c500..1fbd2631be60 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -480,7 +480,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -495,7 +494,15 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, @@ -512,15 +519,9 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); preempt_conditional_sti(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); - debug_stack_usage_dec(); exit: ist_exit(regs); } @@ -886,19 +887,16 @@ void __init trap_init(void) cpu_init(); /* - * X86_TRAP_DB and X86_TRAP_BP have been set - * in early_trap_init(). However, ITS works only after - * cpu_init() loads TSS. See comments in early_trap_init(). + * X86_TRAP_DB was installed in early_trap_init(). However, + * IST works only after cpu_init() loads TSS. See comments + * in early_trap_init(). */ set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); #endif } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c7c4d9c51e99..c42d4a3d9494 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -365,6 +365,8 @@ static int __init tsc_setup(char *str) tsc_clocksource_reliable = 1; if (!strncmp(str, "noirqtime", 9)) no_sched_irq_time = 1; + if (!strcmp(str, "unstable")) + mark_tsc_unstable("boot parameter"); return 1; } @@ -406,7 +408,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) hpet2 -= hpet1; tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); do_div(tmp, 1000000); - do_div(deltatsc, tmp); + deltatsc = div64_u64(deltatsc, tmp); return (unsigned long) deltatsc; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 510e80da7de4..af57736a0309 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -715,7 +715,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) return; check_vip: - if (VEFLAGS & X86_EFLAGS_VIP) { + if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == + (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { save_v86_state(regs, VM86_STI); return; } |
