diff options
294 files changed, 5139 insertions, 1625 deletions
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 701d39d3171a..56d6d8b796db 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -109,7 +109,13 @@ Header notes: 1 - 4K 2 - 16K 3 - 64K - Bits 3-63: Reserved. + Bit 3: Kernel physical placement + 0 - 2MB aligned base should be as close as possible + to the base of DRAM, since memory below it is not + accessible via the linear mapping + 1 - 2MB aligned base may be anywhere in physical + memory + Bits 4-63: Reserved. - When image_size is zero, a bootloader should attempt to keep as much memory as possible free for use by the kernel immediately after the @@ -117,14 +123,14 @@ Header notes: depending on selected features, and is effectively unbound. The Image must be placed text_offset bytes from a 2MB aligned base -address near the start of usable system RAM and called there. Memory -below that base address is currently unusable by Linux, and therefore it -is strongly recommended that this location is the start of system RAM. -The region between the 2 MB aligned base address and the start of the -image has no special significance to the kernel, and may be used for -other purposes. +address anywhere in usable system RAM and called there. The region +between the 2 MB aligned base address and the start of the image has no +special significance to the kernel, and may be used for other purposes. At least image_size bytes from the start of the image must be free for use by the kernel. +NOTE: versions prior to v4.6 cannot make use of memory below the +physical offset of the Image so it is recommended that the Image be +placed as close as possible to the start of system RAM. Any memory described to the kernel (even that below the start of the image) which is not marked as reserved from the kernel (e.g., with a diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt new file mode 100644 index 000000000000..58b71ddf9b60 --- /dev/null +++ b/Documentation/arm64/silicon-errata.txt @@ -0,0 +1,58 @@ + Silicon Errata and Software Workarounds + ======================================= + +Author: Will Deacon <will.deacon@arm.com> +Date : 27 November 2015 + +It is an unfortunate fact of life that hardware is often produced with +so-called "errata", which can cause it to deviate from the architecture +under specific circumstances. For hardware produced by ARM, these +errata are broadly classified into the following categories: + + Category A: A critical error without a viable workaround. + Category B: A significant or critical error with an acceptable + workaround. + Category C: A minor error that is not expected to occur under normal + operation. + +For more information, consult one of the "Software Developers Errata +Notice" documents available on infocenter.arm.com (registration +required). + +As far as Linux is concerned, Category B errata may require some special +treatment in the operating system. For example, avoiding a particular +sequence of code, or configuring the processor in a particular way. A +less common situation may require similar actions in order to declassify +a Category A erratum into a Category C erratum. These are collectively +known as "software workarounds" and are only required in the minority of +cases (e.g. those cases that both require a non-secure workaround *and* +can be triggered by Linux). + +For software workarounds that may adversely impact systems unaffected by +the erratum in question, a Kconfig entry is added under "Kernel +Features" -> "ARM errata workarounds via the alternatives framework". +These are enabled by default and patched in at runtime when an affected +CPU is detected. For less-intrusive workarounds, a Kconfig option is not +available and the code is structured (preferably with a comment) in such +a way that the erratum will not be hit. + +This approach can make it slightly onerous to determine exactly which +errata are worked around in an arbitrary kernel source tree, so this +file acts as a registry of software workarounds in the Linux Kernel and +will be updated when new workarounds are committed and backported to +stable kernels. + +| Implementor | Component | Erratum ID | Kconfig | ++----------------+-----------------+-----------------+-------------------------+ +| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | +| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | +| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | +| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +| ARM | Cortex-A57 | #852523 | N/A | +| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| | | | | +| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index e63316239938..4199ffecc0ff 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | .. | | arc: | TODO | | arm: | ok | - | arm64: | .. | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index af6816bccb43..df1d1f3c9af2 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | TODO | | arc: | TODO | | arm: | TODO | - | arm64: | TODO | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 8 +SUBLEVEL = 9 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 47954ed990f8..00707aac72fc 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -792,3 +792,8 @@ tx-num-evt = <32>; rx-num-evt = <32>; }; + +&synctimer_32kclk { + assigned-clocks = <&mux_synctimer32k_ck>; + assigned-clock-parents = <&clkdiv32k_ick>; +}; diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi index 7ccce7529b0c..cc952cf8ec30 100644 --- a/arch/arm/boot/dts/armada-375.dtsi +++ b/arch/arm/boot/dts/armada-375.dtsi @@ -529,7 +529,7 @@ }; sata@a0000 { - compatible = "marvell,orion-sata"; + compatible = "marvell,armada-370-sata"; reg = <0xa0000 0x5000>; interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gateclk 14>, <&gateclk 20>; diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index 3710755c6d76..85d2c377c332 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -117,7 +117,7 @@ }; /* USB part of the eSATA/USB 2.0 port */ - usb@50000 { + usb@58000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi index cf6998a0804d..564341af7e97 100644 --- a/arch/arm/boot/dts/pxa3xx.dtsi +++ b/arch/arm/boot/dts/pxa3xx.dtsi @@ -30,7 +30,7 @@ reg = <0x43100000 90>; interrupts = <45>; clocks = <&clks CLK_NAND>; - dmas = <&pdma 97>; + dmas = <&pdma 97 3>; dma-names = "data"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 194c91b610ff..c35c349da069 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -79,6 +79,8 @@ #define rr_lo_hi(a1, a2) a1, a2 #endif +#define kvm_ksym_ref(kva) (kva) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e06fd299de08..70e6d557c75f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -969,7 +969,7 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)__kvm_hyp_vector; + vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); @@ -1061,7 +1061,8 @@ static int init_hyp_mode(void) /* * Map the Hyp-code called directly from the host */ - err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); + err = create_hyp_mappings(kvm_ksym_ref(__kvm_hyp_code_start), + kvm_ksym_ref(__kvm_hyp_code_end)); if (err) { kvm_err("Cannot map world-switch code\n"); goto out_free_mappings; diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 3a10f1a8317a..bfd8bb371477 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -26,6 +26,7 @@ menuconfig ARCH_EXYNOS select S5P_DEV_MFC select SRAM select THERMAL + select THERMAL_OF select MFD_SYSCON help Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5) diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index aa7b379e2661..2a3db0bd9e15 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -34,6 +34,7 @@ #include "pm.h" #include "control.h" #include "common.h" +#include "soc.h" /* Mach specific information to be recorded in the C-state driver_data */ struct omap3_idle_statedata { @@ -315,6 +316,69 @@ static struct cpuidle_driver omap3_idle_driver = { .safe_state_index = 0, }; +/* + * Numbers based on measurements made in October 2009 for PM optimized kernel + * with CPU freq enabled on device Nokia N900. Assumes OPP2 (main idle OPP, + * and worst case latencies). + */ +static struct cpuidle_driver omap3430_idle_driver = { + .name = "omap3430_idle", + .owner = THIS_MODULE, + .states = { + { + .enter = omap3_enter_idle_bm, + .exit_latency = 110 + 162, + .target_residency = 5, + .name = "C1", + .desc = "MPU ON + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 106 + 180, + .target_residency = 309, + .name = "C2", + .desc = "MPU ON + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 107 + 410, + .target_residency = 46057, + .name = "C3", + .desc = "MPU RET + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 121 + 3374, + .target_residency = 46057, + .name = "C4", + .desc = "MPU OFF + CORE ON", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 855 + 1146, + .target_residency = 46057, + .name = "C5", + .desc = "MPU RET + CORE RET", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 7580 + 4134, + .target_residency = 484329, + .name = "C6", + .desc = "MPU OFF + CORE RET", + }, + { + .enter = omap3_enter_idle_bm, + .exit_latency = 7505 + 15274, + .target_residency = 484329, + .name = "C7", + .desc = "MPU OFF + CORE OFF", + }, + }, + .state_count = ARRAY_SIZE(omap3_idle_data), + .safe_state_index = 0, +}; + /* Public functions */ /** @@ -333,5 +397,8 @@ int __init omap3_idle_init(void) if (!mpu_pd || !core_pd || !per_pd || !cam_pd) return -ENODEV; - return cpuidle_register(&omap3_idle_driver, NULL); + if (cpu_is_omap3430()) + return cpuidle_register(&omap3430_idle_driver, NULL); + else + return cpuidle_register(&omap3_idle_driver, NULL); } diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 3eaeaca5da05..3a911d8dea8b 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -368,6 +368,7 @@ void __init omap5_map_io(void) void __init dra7xx_map_io(void) { iotable_init(dra7xx_io_desc, ARRAY_SIZE(dra7xx_io_desc)); + omap_barriers_init(); } #endif /* diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 8e0bd5939e5a..147c90e70b2e 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1416,9 +1416,7 @@ static void _enable_sysc(struct omap_hwmod *oh) (sf & SYSC_HAS_CLOCKACTIVITY)) _set_clockactivity(oh, oh->class->sysc->clockact, &v); - /* If the cached value is the same as the new value, skip the write */ - if (oh->_sysc_cache != v) - _write_sysconfig(v, oh); + _write_sysconfig(v, oh); /* * Set the autoidle bit only after setting the smartidle bit @@ -1481,7 +1479,9 @@ static void _idle_sysc(struct omap_hwmod *oh) _set_master_standbymode(oh, idlemode, &v); } - _write_sysconfig(v, oh); + /* If the cached value is the same as the new value, skip the write */ + if (oh->_sysc_cache != v) + _write_sysconfig(v, oh); } /** diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index 9ab8932403e5..56e55fd37d13 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -1,6 +1,7 @@ menuconfig ARCH_SIRF bool "CSR SiRF" if ARCH_MULTI_V7 select ARCH_HAS_RESET_CONTROLLER + select RESET_CONTROLLER select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP select NO_IOPORT_MAP diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 579ab688312d..b1d8af794678 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,7 @@ config ARM64 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS + select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC @@ -48,6 +49,7 @@ config ARM64 select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE + select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB @@ -72,6 +74,7 @@ config ARM64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS + select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS @@ -395,6 +398,7 @@ config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" depends on MODULES default y + select ARM64_MODULE_CMODEL_LARGE help This option builds kernel modules using the large memory model in order to avoid the use of the ADRP instruction, which can cause @@ -539,6 +543,9 @@ config HOTPLUG_CPU source kernel/Kconfig.preempt source kernel/Kconfig.hz +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + config ARCH_HAS_HOLES_MEMORYMODEL def_bool y if SPARSEMEM @@ -562,9 +569,6 @@ config HW_PERF_EVENTS config SYS_SUPPORTS_HUGETLBFS def_bool y -config ARCH_WANT_GENERAL_HUGETLB - def_bool y - config ARCH_WANT_HUGE_PMD_SHARE def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) @@ -737,10 +741,93 @@ config ARM64_LSE_ATOMICS endmenu +config ARM64_UAO + bool "Enable support for User Access Override (UAO)" + default y + help + User Access Override (UAO; part of the ARMv8.2 Extensions) + causes the 'unprivileged' variant of the load/store instructions to + be overriden to be privileged. + + This option changes get_user() and friends to use the 'unprivileged' + variant of the load/store instructions. This ensures that user-space + really did have access to the supplied memory. When addr_limit is + set to kernel memory the UAO bit will be set, allowing privileged + access to kernel memory. + + Choosing this option will cause copy_to_user() et al to use user-space + memory permissions. + + The feature is detected at runtime, the kernel will use the + regular load/store instructions if the cpu does not implement the + feature. + +config ARM64_MODULE_CMODEL_LARGE + bool + +config ARM64_MODULE_PLTS + bool + select ARM64_MODULE_CMODEL_LARGE + select HAVE_MOD_ARCH_SPECIFIC + +config RELOCATABLE + bool + help + This builds the kernel as a Position Independent Executable (PIE), + which retains all relocation metadata required to relocate the + kernel binary at runtime to a different virtual address than the + address it was linked at. + Since AArch64 uses the RELA relocation format, this requires a + relocation pass at runtime even if the kernel is loaded at the + same address it was linked at. + +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + select ARM64_MODULE_PLTS + select RELOCATABLE + help + Randomizes the virtual address at which the kernel image is + loaded, as a security feature that deters exploit attempts + relying on knowledge of the location of kernel internals. + + It is the bootloader's job to provide entropy, by passing a + random u64 value in /chosen/kaslr-seed at kernel entry. + + When booting via the UEFI stub, it will invoke the firmware's + EFI_RNG_PROTOCOL implementation (if available) to supply entropy + to the kernel proper. In addition, it will randomise the physical + location of the kernel Image as well. + + If unsure, say N. + +config RANDOMIZE_MODULE_REGION_FULL + bool "Randomize the module region independently from the core kernel" + depends on RANDOMIZE_BASE + default y + help + Randomizes the location of the module region without considering the + location of the core kernel. This way, it is impossible for modules + to leak information about the location of core kernel data structures + but it does imply that function calls between modules and the core + kernel will need to be resolved via veneers in the module PLT. + + When this option is not set, the module region will be randomized over + a limited range that contains the [_stext, _etext] interval of the + core kernel, so branch relocations are always in range. + endmenu menu "Boot options" +config ARM64_ACPI_PARKING_PROTOCOL + bool "Enable support for the ARM64 ACPI parking protocol" + depends on ACPI + help + Enable support for the ARM64 ACPI parking protocol. If disabled + the kernel will not allow booting through the ARM64 ACPI parking + protocol even if the corresponding data is present in the ACPI + MADT table. + config CMDLINE string "Default kernel command string" default "" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 285b32fa41c1..14c74a66c58e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 +ifneq ($(CONFIG_RELOCATABLE),) +LDFLAGS_vmlinux += -pie +endif + KBUILD_DEFCONFIG := defconfig # Check for binutils support for specific extensions @@ -29,6 +33,7 @@ endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) KBUILD_CFLAGS += -fno-pic KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) @@ -43,10 +48,14 @@ endif CHECKFLAGS += -D__aarch64__ -ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large endif +ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index caafd63b8092..aee323b13802 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -87,9 +87,26 @@ void __init acpi_init_cpus(void); static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */ +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +bool acpi_parking_protocol_valid(int cpu); +void __init +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor); +#else +static inline bool acpi_parking_protocol_valid(int cpu) { return false; } +static inline void +acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor) +{} +#endif + static inline const char *acpi_get_enable_method(int cpu) { - return acpi_psci_present() ? "psci" : NULL; + if (acpi_psci_present()) + return "psci"; + + if (acpi_parking_protocol_valid(cpu)) + return "parking-protocol"; + + return NULL; } #ifdef CONFIG_ACPI_APEI diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d56ec0715157..beccbdefa106 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,6 +1,8 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#include <asm/cpufeature.h> + #ifndef __ASSEMBLY__ #include <linux/init.h> @@ -19,7 +21,6 @@ struct alt_instr { void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ @@ -64,6 +65,8 @@ void free_alternatives_memory(void); #else +#include <asm/assembler.h> + .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len .word \orig_offset - . .word \alt_offset - . @@ -137,6 +140,65 @@ void free_alternatives_memory(void); alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +/* + * Generate the assembly for UAO alternatives with exception table entries. + * This is complicated as there is no post-increment or pair versions of the + * unprivileged instructions, and USER() only works for single instructions. + */ +#ifdef CONFIG_ARM64_UAO + .macro uao_ldp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: ldp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + ldtr \reg1, [\addr]; + ldtr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro uao_stp l, reg1, reg2, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: stp \reg1, \reg2, [\addr], \post_inc; +8889: nop; + nop; + alternative_else + sttr \reg1, [\addr]; + sttr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + alternative_if_not ARM64_HAS_UAO +8888: \inst \reg, [\addr], \post_inc; + nop; + alternative_else + \alt_inst \reg, [\addr]; + add \addr, \addr, \post_inc; + alternative_endif + + _asm_extable 8888b,\l; + .endm +#else + .macro uao_ldp l, reg1, reg2, addr, post_inc + USER(\l, ldp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_stp l, reg1, reg2, addr, post_inc + USER(\l, stp \reg1, \reg2, [\addr], \post_inc) + .endm + .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc + USER(\l, \inst \reg, [\addr], \post_inc) + .endm +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 12eff928ef8b..70f7b9e04598 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -94,12 +94,19 @@ dmb \opt .endm +/* + * Emit an entry into the exception table + */ + .macro _asm_extable, from, to + .pushsection __ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection + .endm + #define USER(l, x...) \ 9999: x; \ - .section __ex_table,"a"; \ - .align 3; \ - .quad 9999b,l; \ - .previous + _asm_extable 9999b, l /* * Register aliases. @@ -193,6 +200,17 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm + /* + * @sym: The name of the per-cpu variable + * @reg: Result of per_cpu(sym, smp_processor_id()) + * @tmp: scratch register + */ + .macro this_cpu_ptr, sym, reg, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el1 + add \reg, \reg, \tmp + .endm + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. @@ -204,4 +222,15 @@ lr .req x30 // link register .size __pi_##x, . - x; \ ENDPROC(x) + /* + * Emit a 64-bit absolute little endian symbol reference in a way that + * ensures that it will be resolved at build time, even when building a + * PIE binary. This requires cooperation from the linker script, which + * must emit the lo32/hi32 halves individually. + */ + .macro le64sym, sym + .long \sym\()_lo32 + .long \sym\()_hi32 + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 197e06afbf71..39c1d340fec5 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v) " stclr %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_or(int i, atomic_t *v) @@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v) " stset %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_xor(int i, atomic_t *v) @@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v) " steor %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_add(int i, atomic_t *v) @@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v) " stadd %w[i], %[v]\n") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ @@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return w0; \ } @@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v) " stclr %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic_sub(int i, atomic_t *v) @@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stadd %w[i], %[v]") : [i] "+r" (w0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ @@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " add %w[i], %w[i], w30") \ : [i] "+r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS , ##cl); \ \ return w0; \ } @@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v) " stclr %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_or(long i, atomic64_t *v) @@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v) " stset %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_xor(long i, atomic64_t *v) @@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v) " steor %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_add(long i, atomic64_t *v) @@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v) " stadd %[i], %[v]\n") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ @@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v) " stclr %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } static inline void atomic64_sub(long i, atomic64_t *v) @@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) " stadd %[i], %[v]") : [i] "+r" (x0), [v] "+Q" (v->counter) : "r" (x1) - : "x30"); + : __LL_SC_CLOBBERS); } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ @@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " add %[i], %[i], x30") \ : [i] "+r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) "2:") : [ret] "+&r" (x0), [v] "+Q" (v->counter) : - : "x30", "cc", "memory"); + : __LL_SC_CLOBBERS, "cc", "memory"); return x0; } @@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ " mov %" #w "[ret], " #w "30") \ : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ : [old] "r" (x1), [new] "r" (x2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } @@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ - : "x30" , ##cl); \ + : __LL_SC_CLOBBERS, ##cl); \ \ return x0; \ } diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index 81151b67b26b..ebf2481889c3 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -11,4 +11,10 @@ #define MIN_FDT_ALIGN 8 #define MAX_FDT_SIZE SZ_2M +/* + * arm64 requires the kernel image to placed + * TEXT_OFFSET bytes beyond a 2 MB aligned base + */ +#define MIN_KIMG_ALIGN SZ_2M + #endif diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 000000000000..ed693c5bcec0 --- /dev/null +++ b/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + */ +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 + +#endif diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 4a748ce9ba1a..561190d15881 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -18,7 +18,7 @@ #ifndef _ARCH_ARM64_ASM_BUG_H #define _ARCH_ARM64_ASM_BUG_H -#include <asm/debug-monitors.h> +#include <asm/brk-imm.h> #ifdef CONFIG_GENERIC_BUG #define HAVE_ARCH_BUG diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index ca3b7841e1c6..22dda613f9c9 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -68,6 +68,7 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 9ea611ea69df..510c7b404454 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,7 +19,6 @@ #define __ASM_CMPXCHG_H #include <linux/bug.h> -#include <linux/mmdebug.h> #include <asm/atomic.h> #include <asm/barrier.h> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b5e9cee4b5f8..13a6103130cd 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -36,6 +36,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; + u64 reg_id_aa64mmfr2; u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8f271b83f910..37a53fc6b384 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -30,8 +30,11 @@ #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 +#define ARM64_HAS_NO_HW_PREFETCH 8 +#define ARM64_HAS_UAO 9 +#define ARM64_ALT_PAN_NOT_UAO 10 -#define ARM64_NCAPS 8 +#define ARM64_NCAPS 11 #ifndef __ASSEMBLY__ @@ -176,7 +179,7 @@ u64 read_system_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) { - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(SYS_ID_AA64MMFR0_EL1)); } static inline bool system_supports_mixed_endian_el0(void) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1a5949364ed0..b3a83da152a7 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -32,12 +32,6 @@ #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK) -#define read_cpuid(reg) ({ \ - u64 __val; \ - asm("mrs %0, " #reg : "=r" (__val)); \ - __val; \ -}) - #define MIDR_REVISION_MASK 0xf #define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) #define MIDR_PARTNUM_SHIFT 4 @@ -57,11 +51,22 @@ #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) -#define MIDR_CPU_PART(imp, partnum) \ +#define MIDR_CPU_MODEL(imp, partnum) \ (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) +#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \ +({ \ + u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \ + u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \ + \ + _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ + }) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 @@ -75,8 +80,20 @@ #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) + #ifndef __ASSEMBLY__ +#include <asm/sysreg.h> + +#define read_cpuid(reg) ({ \ + u64 __val; \ + asm("mrs_s %0, " __stringify(reg) : "=r" (__val)); \ + __val; \ +}) + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -84,12 +101,12 @@ */ static inline u32 __attribute_const__ read_cpuid_id(void) { - return read_cpuid(MIDR_EL1); + return read_cpuid(SYS_MIDR_EL1); } static inline u64 __attribute_const__ read_cpuid_mpidr(void) { - return read_cpuid(MPIDR_EL1); + return read_cpuid(SYS_MPIDR_EL1); } static inline unsigned int __attribute_const__ read_cpuid_implementor(void) @@ -104,7 +121,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void) static inline u32 __attribute_const__ read_cpuid_cachetype(void) { - return read_cpuid(CTR_EL0); + return read_cpuid(SYS_CTR_EL0); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 279c85b5ec09..2fcb9b7c876c 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -20,6 +20,7 @@ #include <linux/errno.h> #include <linux/types.h> +#include <asm/brk-imm.h> #include <asm/esr.h> #include <asm/insn.h> #include <asm/ptrace.h> @@ -47,19 +48,6 @@ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE /* - * #imm16 values used for BRK instruction generation - * Allowed values for kgbd are 0x400 - 0x7ff - * 0x100: for triggering a fault on purpose (reserved) - * 0x400: for dynamic BRK instruction - * 0x401: for compile time BRK instruction - * 0x800: kernel-mode BUG() and WARN() traps - */ -#define FAULT_BRK_IMM 0x100 -#define KGDB_DYN_DBG_BRK_IMM 0x400 -#define KGDB_COMPILED_DBG_BRK_IMM 0x401 -#define BUG_BRK_IMM 0x800 - -/* * BRK instruction encoding * The #imm16 value should be placed at bits[20:5] within BRK ins */ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index faad6df49e5b..24ed037f09fd 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -24,15 +24,6 @@ #include <asm/ptrace.h> #include <asm/user.h> -typedef unsigned long elf_greg_t; - -#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) -#define ELF_CORE_COPY_REGS(dest, regs) \ - *(struct user_pt_regs *)&(dest) = (regs)->user_regs; - -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -typedef struct user_fpsimd_state elf_fpregset_t; - /* * AArch64 static relocation types. */ @@ -86,6 +77,8 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define R_AARCH64_MOVW_PREL_G2_NC 292 #define R_AARCH64_MOVW_PREL_G3 293 +#define R_AARCH64_RELATIVE 1027 + /* * These are used to set parameters in the core dumps. */ @@ -127,6 +120,17 @@ typedef struct user_fpsimd_state elf_fpregset_t; */ #define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) +#ifndef __ASSEMBLY__ + +typedef unsigned long elf_greg_t; + +#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) +#define ELF_CORE_COPY_REGS(dest, regs) \ + *(struct user_pt_regs *)&(dest) = (regs)->user_regs; + +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; +typedef struct user_fpsimd_state elf_fpregset_t; + /* * When the program starts, a1 contains a pointer to a function to be * registered with atexit, as per the SVR4 ABI. A value of 0 means we have no @@ -186,4 +190,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +#endif /* !__ASSEMBLY__ */ + #endif diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 309704544d22..1a617d46fce9 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,6 +62,16 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + + /* + * Used for kernel page table creation, so unmapped memory may be used + * for tables. + */ + FIX_PTE, + FIX_PMD, + FIX_PUD, + FIX_PGD, + __end_of_fixed_addresses }; diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index c5534facf941..3c60f37e48ab 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -28,6 +28,8 @@ struct dyn_arch_ftrace { extern unsigned long ftrace_graph_call; +extern void return_to_handler(void); + static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 007a69fc4f40..f2585cdd32c2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -42,10 +42,8 @@ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ -" .pushsection __ex_table,\"a\"\n" \ -" .align 3\n" \ -" .quad 1b, 4b, 2b, 4b\n" \ -" .popsection\n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ @@ -121,6 +119,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -133,10 +132,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "4: mov %w0, %w6\n" " b 3b\n" " .popsection\n" -" .pushsection __ex_table,\"a\"\n" -" .align 3\n" -" .quad 1b, 4b, 2b, 4b\n" -" .popsection\n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) +ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index a57601f9d17c..8740297dac77 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 5 +#define NR_IPI 6 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index bb4052e85dba..bbc1e35aa601 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, @@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page) clear_bit(PG_dcache_clean, &page->flags); } +extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable); +#define arch_make_huge_pte arch_make_huge_pte +extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); +extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); +extern void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep); + #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8e8d30684392..b77197d941fc 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,10 +1,45 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START_SP THREAD_START_SP + +#ifndef __ASSEMBLER__ + +#include <linux/percpu.h> + #include <asm-generic/irq.h> +#include <asm/thread_info.h> struct pt_regs; +DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack); + +/* + * The highest address on the stack, and the first to be used. Used to + * find the dummy-stack frame put down by el?_irq() in entry.S, which + * is structured as follows: + * + * ------------ + * | | <- irq_stack_ptr + * top ------------ + * | x19 | <- irq_stack_ptr - 0x08 + * ------------ + * | x29 | <- irq_stack_ptr - 0x10 + * ------------ + * + * where x19 holds a copy of the task stack pointer where the struct pt_regs + * from kernel_entry can be found. + * + */ +#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP) + +/* + * The offset from irq_stack_ptr where entry.S will store the original + * stack pointer. Used by unwind_frame() and dump_backtrace(). + */ +#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08))) + extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) @@ -12,4 +47,14 @@ static inline int nr_legacy_irqs(void) return 0; } +static inline bool on_irq_stack(unsigned long sp, int cpu) +{ + /* variable names the same as kernel/stacktrace.c */ + unsigned long low = (unsigned long)per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_START_SP; + + return (low <= sp && sp <= high); +} + +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 2774fa384c47..71ad0f93eb71 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -7,13 +7,14 @@ #include <linux/linkage.h> #include <asm/memory.h> +#include <asm/pgtable-types.h> /* * KASAN_SHADOW_START: beginning of the kernel virtual addresses. * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. */ #define KASAN_SHADOW_START (VA_START) -#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) /* * This value is used to map an address to the corresponding shadow @@ -28,10 +29,12 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) void kasan_init(void); +void kasan_copy_shadow(pgd_t *pgdir); asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } +static inline void kasan_copy_shadow(pgd_t *pgdir) { } #endif #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a459714ee29e..5c6375d8528b 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -79,5 +79,17 @@ #define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) #endif +/* + * To make optimal use of block mappings when laying out the linear + * mapping, round down the base of physical memory to a size that can + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE + * (64k granule), or a multiple that can be mapped using contiguous bits + * in the page tables: 32 * PMD_SIZE (16k granule) + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define ARM64_MEMSTART_ALIGN SZ_512M +#else +#define ARM64_MEMSTART_ALIGN SZ_1G +#endif #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5e377101f919..419bc6661b5c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -102,6 +102,8 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define kvm_ksym_ref(sym) phys_to_virt((u64)&sym - kimage_voffset) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a35ce7266aac..90c6368ad7c8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -222,7 +222,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -u64 kvm_call_hyp(void *hypfn, ...); +u64 __kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); @@ -243,8 +243,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * Call initialization code, and switch to the full blown * HYP code. */ - kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, - hyp_stack_ptr, vector_ptr); + __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, + hyp_stack_ptr, vector_ptr); } static inline void kvm_arch_hardware_disable(void) {} @@ -258,4 +258,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 3de42d68611d..23acc00be32d 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -26,6 +26,7 @@ __asm__(".arch_extension lse"); /* Macro for constructing calls to out-of-line ll/sc atomics */ #define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n" +#define __LL_SC_CLOBBERS "x16", "x17", "x30" /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 853953cd1f08..12f8a00fb3f1 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -24,6 +24,7 @@ #include <linux/compiler.h> #include <linux/const.h> #include <linux/types.h> +#include <asm/bug.h> #include <asm/sizes.h> /* @@ -45,15 +46,15 @@ * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. - * The module space lives between the addresses given by TASK_SIZE - * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) -#define MODULES_END (PAGE_OFFSET) -#define MODULES_VADDR (MODULES_END - SZ_64M) -#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define KIMAGE_VADDR (MODULES_END) +#define MODULES_END (MODULES_VADDR + MODULES_VSIZE) +#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE) +#define MODULES_VSIZE (SZ_128M) +#define PCI_IO_END (PAGE_OFFSET - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) @@ -71,12 +72,27 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) /* + * The size of the KASAN shadow region. This should be 1/8th of the + * size of the entire kernel virtual address space. + */ +#ifdef CONFIG_KASAN +#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - 3)) +#else +#define KASAN_SHADOW_SIZE (0) +#endif + +/* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. */ -#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET)) -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) +#define __virt_to_phys(x) ({ \ + phys_addr_t __x = (phys_addr_t)(x); \ + __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ + (__x - kimage_voffset); }) + +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) +#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* * Convert a page to/from a physical address @@ -100,19 +116,40 @@ #define MT_S2_NORMAL 0xf #define MT_S2_DEVICE_nGnRE 0x1 +#ifdef CONFIG_ARM64_4K_PAGES +#define IOREMAP_MAX_ORDER (PUD_SHIFT) +#else +#define IOREMAP_MAX_ORDER (PMD_SHIFT) +#endif + +#ifdef CONFIG_BLK_DEV_INITRD +#define __early_init_dt_declare_initrd(__start, __end) \ + do { \ + initrd_start = (__start); \ + initrd_end = (__end); \ + } while (0) +#endif + #ifndef __ASSEMBLY__ -extern phys_addr_t memstart_addr; +#include <linux/bitops.h> +#include <linux/mmdebug.h> + +extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ -#define PHYS_OFFSET ({ memstart_addr; }) +#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) + +/* the virtual base of the kernel image (minus TEXT_OFFSET) */ +extern u64 kimage_vaddr; + +/* the offset between the kernel virtual and physical mappings */ +extern u64 kimage_voffset; /* - * The maximum physical address that the linear direct mapping - * of system RAM can cover. (PAGE_OFFSET can be interpreted as - * a 2's complement signed quantity and negated to derive the - * maximum size of the linear mapping.) + * Allow all memory at the discovery stage. We will clip it later. */ -#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) +#define MIN_MEMBLOCK_ADDR 0 +#define MAX_MEMBLOCK_ADDR U64_MAX /* * PFNs are used to describe any physical page; this means diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 24165784b803..a00f7cf35bbd 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,6 +27,7 @@ #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> +#include <asm/tlbflush.h> #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) @@ -48,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" @@ -73,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void) /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ -static inline void cpu_set_default_tcr_t0sz(void) +static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr; @@ -86,7 +87,62 @@ static inline void cpu_set_default_tcr_t0sz(void) " msr tcr_el1, %0 ;" " isb" : "=&r" (tcr) - : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); +} + +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) + +/* + * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. + * + * The idmap lives in the same VA range as userspace, but uses global entries + * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from + * speculative TLB fetches, we must temporarily install the reserved page + * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ. + * + * If current is a not a user task, the mm covers the TTBR1_EL1 page tables, + * which should not be installed in TTBR0_EL1. In this case we can leave the + * reserved page tables in place. + */ +static inline void cpu_uninstall_idmap(void) +{ + struct mm_struct *mm = current->active_mm; + + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); + + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); +} + +static inline void cpu_install_idmap(void) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); + + cpu_switch_mm(idmap_pg_dir, &init_mm); +} + +/* + * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, + * avoiding the possibility of conflicting TLB entries being allocated. + */ +static inline void cpu_replace_ttbr1(pgd_t *pgd) +{ + typedef void (ttbr_replace_func)(phys_addr_t); + extern ttbr_replace_func idmap_cpu_replace_ttbr1; + ttbr_replace_func *replace_phys; + + phys_addr_t pgd_phys = virt_to_phys(pgd); + + replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + + cpu_install_idmap(); + replace_phys(pgd_phys); + cpu_uninstall_idmap(); } /* diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index e80e232b730e..e12af6754634 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -20,4 +20,21 @@ #define MODULE_ARCH_VERMAGIC "aarch64" +#ifdef CONFIG_ARM64_MODULE_PLTS +struct mod_arch_specific { + struct elf64_shdr *plt; + int plt_num_entries; + int plt_max_entries; +}; +#endif + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym); + +#ifdef CONFIG_RANDOMIZE_BASE +extern u64 module_alloc_base; +#else +#define module_alloc_base ((u64)_etext - MODULES_VSIZE) +#endif + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index c15053902942..ff98585d085a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) { - set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); + set_pud(pud, __pud(pmd | prot)); } +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE); +} +#else +static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) free_page((unsigned long)pud); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) { - set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); + set_pgd(pgdp, __pgd(pud | prot)); } +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE); +} +#else +static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot) +{ + BUILD_BUG(); +} #endif /* CONFIG_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836f7b..5c25b831273d 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -90,7 +90,23 @@ /* * Contiguous page definitions. */ -#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +#ifdef CONFIG_ARM64_64K_PAGES +#define CONT_PTE_SHIFT 5 +#define CONT_PMD_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define CONT_PTE_SHIFT 7 +#define CONT_PMD_SHIFT 5 +#else +#define CONT_PTE_SHIFT 4 +#define CONT_PMD_SHIFT 4 +#endif + +#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) +#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) +#define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) /* the the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index eaa9cabf4066..c3c2518eecfe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -36,19 +36,13 @@ * * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). - * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_START: beginning of the kernel vmalloc space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#ifndef CONFIG_KASAN -#define VMALLOC_START (VA_START) -#else -#include <asm/kasan.h> -#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) -#endif - +#define VMALLOC_START (MODULES_END) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define VMEMMAP_START (VMALLOC_END + SZ_64K) @@ -59,6 +53,7 @@ #ifndef __ASSEMBLY__ +#include <asm/fixmap.h> #include <linux/mmdebug.h> extern void __pte_error(const char *file, int line, unsigned long val); @@ -69,11 +64,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -83,7 +78,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) @@ -123,8 +118,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) @@ -136,16 +131,6 @@ extern struct page *empty_zero_page; #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -/* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - /* * The following only work if pte_present(). Undefined behaviour otherwise. */ @@ -155,6 +140,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -165,10 +151,18 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +#define pte_valid_young(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + +/* + * Could the pte be present in the TLB? We must check mm_tlb_flush_pending + * so that we don't erroneously return false for pages that have been + * remapped as PROT_NONE but are yet to be flushed from the TLB. + */ +#define pte_accessible(mm, pte) \ + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { @@ -219,7 +213,8 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline pte_t pte_mkcont(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_CONT)); + pte = set_pte_bit(pte, __pgprot(PTE_CONT)); + return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE)); } static inline pte_t pte_mknoncont(pte_t pte) @@ -227,6 +222,11 @@ static inline pte_t pte_mknoncont(pte_t pte) return clear_pte_bit(pte, __pgprot(PTE_CONT)); } +static inline pmd_t pmd_mkcont(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | PMD_SECT_CONT); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -264,13 +264,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_user(pte)) { - if (!pte_special(pte) && pte_exec(pte)) - __sync_icache_dcache(pte, addr); + if (pte_present(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; + if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte, addr); } /* @@ -300,7 +300,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Hugetlb definitions. */ -#define HUGE_MAX_HSTATE 2 +#define HUGE_MAX_HSTATE 4 #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -354,6 +354,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) @@ -426,13 +427,31 @@ static inline void pmd_clear(pmd_t *pmdp) set_pmd(pmdp, __pmd(0)); } -static inline pte_t *pmd_page_vaddr(pmd_t pmd) +static inline phys_addr_t pmd_page_paddr(pmd_t pmd) { - return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); + return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) +#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) +#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) +#define pte_clear_fixmap() clear_fixmap(FIX_PTE) + #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -459,21 +478,37 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -static inline pmd_t *pud_page_vaddr(pud_t pud) +static inline phys_addr_t pud_page_paddr(pud_t pud) { - return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); + return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} +#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) + +#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr)) +#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) +#define pmd_clear_fixmap() clear_fixmap(FIX_PMD) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) + +#else + +#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) + +/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */ +#define pmd_set_fixmap(addr) NULL +#define pmd_set_fixmap_offset(pudp, addr) ((pmd_t *)pudp) +#define pmd_clear_fixmap() + +#define pmd_offset_kimg(dir,addr) ((pmd_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -495,21 +530,37 @@ static inline void pgd_clear(pgd_t *pgdp) set_pgd(pgdp, __pgd(0)); } -static inline pud_t *pgd_page_vaddr(pgd_t pgd) +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) { - return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); + return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) -{ - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); -} +#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) + +#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) +#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) +#define pud_clear_fixmap() clear_fixmap(FIX_PUD) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +/* use ONLY for statically allocated translation tables */ +#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) + +#else + +#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) + +/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */ +#define pud_set_fixmap(addr) NULL +#define pud_set_fixmap_offset(pgdp, addr) ((pud_t *)pgdp) +#define pud_clear_fixmap() + +#define pud_offset_kimg(dir,addr) ((pud_t *)dir) + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -517,11 +568,16 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) +#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) + +#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr)) +#define pgd_clear_fixmap() clear_fixmap(FIX_PGD) + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | @@ -641,6 +697,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * bits 0-1: present (must be zero) * bits 2-7: swap type * bits 8-57: swap offset + * bit 58: PTE_PROT_NONE (must be zero) */ #define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 @@ -666,7 +723,8 @@ extern int kern_addr_valid(unsigned long addr); #include <asm-generic/pgtable.h> -#define pgtable_cache_init() do { } while (0) +void pgd_cache_init(void); +#define pgtable_cache_init pgd_cache_init /* * On AArch64, the cache coherency is handled via the set_pte_at() function. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4acb7ca94fcd..cef1cf398356 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -29,8 +29,10 @@ #include <linux/string.h> +#include <asm/alternative.h> #include <asm/fpsimd.h> #include <asm/hw_breakpoint.h> +#include <asm/lse.h> #include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/types.h> @@ -177,9 +179,11 @@ static inline void prefetchw(const void *ptr) } #define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *x) +static inline void spin_lock_prefetch(const void *ptr) { - prefetchw(x); + asm volatile(ARM64_LSE_ATOMIC_INSN( + "prfm pstl1strm, %a0", + "nop") : : "p" (ptr)); } #define HAVE_ARCH_PICK_MMAP_LAYOUT @@ -187,5 +191,6 @@ static inline void spin_lock_prefetch(const void *x) #endif void cpu_enable_pan(void *__unused); +void cpu_enable_uao(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h index 4df608a8459e..e368a55ebd22 100644 --- a/arch/arm64/include/asm/shmparam.h +++ b/arch/arm64/include/asm/shmparam.h @@ -21,7 +21,7 @@ * alignment value. Since we don't have aliasing D-caches, the rest of * the time we can safely use PAGE_SIZE. */ -#define COMPAT_SHMLBA 0x4000 +#define COMPAT_SHMLBA (4 * PAGE_SIZE) #include <asm-generic/shmparam.h> diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index d9c3d6a6100a..2013a4dc5124 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -64,6 +64,15 @@ extern void secondary_entry(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); +#else +static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + BUILD_BUG(); +} +#endif + extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c85e96d174a5..fc9682bfe002 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -26,9 +26,28 @@ * The memory barriers are implicit with the load-acquire and store-release * instructions. */ +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + unsigned int tmp; + arch_spinlock_t lockval; -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) + asm volatile( +" sevl\n" +"1: wfe\n" +"2: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 1b\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" stxr %w1, %w0, %2\n" +" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ + /* LSE atomics */ +" nop\n" +" nop\n") + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : + : "memory"); +} #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 7318f6d54aa9..801a16dbbdf6 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -16,14 +16,19 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +struct task_struct; + struct stackframe { unsigned long fp; unsigned long sp; unsigned long pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned int graph; +#endif }; -extern int unwind_frame(struct stackframe *frame); -extern void walk_stackframe(struct stackframe *frame, +extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); +extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d48ab5b41f52..b9fd8ec79033 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -70,15 +70,19 @@ #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) +#define SYS_ID_AA64MMFR2_EL1 sys_reg(3, 0, 0, 7, 2) #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) #define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) #define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ + (!!x)<<8 | 0x1f) /* SCTLR_EL1 */ #define SCTLR_EL1_CP15BEN (0x1 << 5) @@ -135,6 +139,9 @@ #define ID_AA64MMFR1_VMIDBITS_SHIFT 4 #define ID_AA64MMFR1_HADBS_SHIFT 0 +/* id_aa64mmfr2 */ +#define ID_AA64MMFR2_UAO_SHIFT 4 + /* id_aa64dfr0 */ #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 @@ -194,32 +201,32 @@ #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ __reg_num_x\num, \num + .equ .L__reg_num_x\num, \num .endr - .equ __reg_num_xzr, 31 + .equ .L__reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5200000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5000000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt) .endm #else asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ __reg_num_x\\num, \\num\n" +" .equ .L__reg_num_x\\num, \\num\n" " .endr\n" -" .equ __reg_num_xzr, 31\n" +" .equ .L__reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n" " .endm\n" ); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 90c7ff233735..abd64bd1f6d9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index b2ede967fe7d..0685d74572af 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -36,11 +36,11 @@ #define VERIFY_WRITE 1 /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is - * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. * * All the routines below use bits of fixup code that are out of line * with the main instruction path. This means when everything is well, @@ -50,9 +50,11 @@ struct exception_table_entry { - unsigned long insn, fixup; + int insn, fixup; }; +#define ARCH_HAS_RELATIVE_EXTABLE + extern int fixup_exception(struct pt_regs *regs); #define KERNEL_DS (-1UL) @@ -64,6 +66,16 @@ extern int fixup_exception(struct pt_regs *regs); static inline void set_fs(mm_segment_t fs) { current_thread_info()->addr_limit = fs; + + /* + * Enable/disable UAO so that copy_to_user() etc can access + * kernel memory with the unprivileged instructions. + */ + if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, + CONFIG_ARM64_UAO)); } #define segment_eq(a, b) ((a) == (b)) @@ -105,6 +117,12 @@ static inline void set_fs(mm_segment_t fs) #define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs +#define _ASM_EXTABLE(from, to) \ + " .pushsection __ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + /* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" @@ -113,9 +131,10 @@ static inline void set_fs(mm_segment_t fs) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_user_asm(instr, reg, x, addr, err) \ +#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup, \"ax\"\n" \ " .align 2\n" \ @@ -123,10 +142,7 @@ static inline void set_fs(mm_segment_t fs) " mov %1, #0\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) @@ -134,26 +150,30 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __get_user_asm("ldrh", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __get_user_asm("ldr", "%w", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __get_user_asm("ldr", "%", __gu_val, (ptr), (err)); \ + __get_user_asm("ldr", "ldtr", "%", __gu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) @@ -181,19 +201,17 @@ do { \ ((x) = 0, -EFAULT); \ }) -#define __put_user_asm(instr, reg, x, addr, err) \ +#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ asm volatile( \ - "1: " instr " " reg "1, [%2]\n" \ + "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ + alt_instr " " reg "1, [%2]\n", feature) \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .align 2\n" \ "3: mov %w0, %3\n" \ " b 2b\n" \ " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 1b, 3b\n" \ - " .previous" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) @@ -201,25 +219,29 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 2: \ - __put_user_asm("strh", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 4: \ - __put_user_asm("str", "%w", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ case 8: \ - __put_user_asm("str", "%", __pu_val, (ptr), (err)); \ + __put_user_asm("str", "sttr", "%", __pu_val, (ptr), \ + (err), ARM64_HAS_UAO); \ break; \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ CONFIG_ARM64_PAN)); \ } while (0) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index aab5bf09e9d9..2b79b8a89457 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -16,6 +16,8 @@ #ifndef __ASM_WORD_AT_A_TIME_H #define __ASM_WORD_AT_A_TIME_H +#include <asm/uaccess.h> + #ifndef __AARCH64EB__ #include <linux/kernel.h> @@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif " b 2b\n" " .popsection\n" - " .pushsection __ex_table,\"a\"\n" - " .align 3\n" - " .quad 1b, 3b\n" - " .popsection" + _ASM_EXTABLE(1b, 3b) : "=&r" (ret), "=&r" (offset) : "r" (addr), "Q" (*(unsigned long *)addr)); diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 208db3df135a..b5c3933ed441 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -45,6 +45,7 @@ #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 #define PSR_PAN_BIT 0x00400000 +#define PSR_UAO_BIT 0x00800000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 474691f8b13a..49a2430b0786 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o +arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o @@ -41,6 +42,8 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o +arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c new file mode 100644 index 000000000000..4b1e5a7a98da --- /dev/null +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -0,0 +1,153 @@ +/* + * ARM64 ACPI Parking Protocol implementation + * + * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/acpi.h> +#include <linux/types.h> + +#include <asm/cpu_ops.h> + +struct cpu_mailbox_entry { + phys_addr_t mailbox_addr; + u8 version; + u8 gic_cpu_id; +}; + +static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS]; + +void __init acpi_set_mailbox_entry(int cpu, + struct acpi_madt_generic_interrupt *p) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + cpu_entry->mailbox_addr = p->parked_address; + cpu_entry->version = p->parking_version; + cpu_entry->gic_cpu_id = p->cpu_interface_number; +} + +bool acpi_parking_protocol_valid(int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + + return cpu_entry->mailbox_addr && cpu_entry->version; +} + +static int acpi_parking_protocol_cpu_init(unsigned int cpu) +{ + pr_debug("%s: ACPI parked addr=%llx\n", __func__, + cpu_mailbox_entries[cpu].mailbox_addr); + + return 0; +} + +static int acpi_parking_protocol_cpu_prepare(unsigned int cpu) +{ + return 0; +} + +struct parking_protocol_mailbox { + __le32 cpu_id; + __le32 reserved; + __le64 entry_point; +}; + +static int acpi_parking_protocol_cpu_boot(unsigned int cpu) +{ + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le32 cpu_id; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return -EIO; + + cpu_id = readl_relaxed(&mailbox->cpu_id); + /* + * Check if firmware has set-up the mailbox entry properly + * before kickstarting the respective cpu. + */ + if (cpu_id != ~0U) { + iounmap(mailbox); + return -ENXIO; + } + + /* + * We write the entry point and cpu id as LE regardless of the + * native endianness of the kernel. Therefore, any boot-loaders + * that read this address need to convert this address to the + * Boot-Loader's endianness before jumping. + */ + writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point); + writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); + + arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + + iounmap(mailbox); + + return 0; +} + +static void acpi_parking_protocol_cpu_postboot(void) +{ + int cpu = smp_processor_id(); + struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; + struct parking_protocol_mailbox __iomem *mailbox; + __le64 entry_point; + + /* + * Map mailbox memory with attribute device nGnRE (ie ioremap - + * this deviates from the parking protocol specifications since + * the mailboxes are required to be mapped nGnRnE; the attribute + * discrepancy is harmless insofar as the protocol specification + * is concerned). + * If the mailbox is mistakenly allocated in the linear mapping + * by FW ioremap will fail since the mapping will be prevented + * by the kernel (it clashes with the linear mapping attributes + * specifications). + */ + mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox)); + if (!mailbox) + return; + + entry_point = readl_relaxed(&mailbox->entry_point); + /* + * Check if firmware has cleared the entry_point as expected + * by the protocol specification. + */ + WARN_ON(entry_point); + + iounmap(mailbox); +} + +const struct cpu_operations acpi_parking_protocol_ops = { + .name = "parking-protocol", + .cpu_init = acpi_parking_protocol_cpu_init, + .cpu_prepare = acpi_parking_protocol_cpu_prepare, + .cpu_boot = acpi_parking_protocol_cpu_boot, + .cpu_postboot = acpi_parking_protocol_cpu_postboot +}; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index ab9db0e9818c..d2ee1b21a10d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 937f5e58a4d3..c37202c0c838 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -62,7 +62,7 @@ struct insn_emulation { }; static LIST_HEAD(insn_emulation); -static int nr_insn_emulated; +static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); static void register_emulation_hooks(struct insn_emulation_ops *ops) @@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, return ret; } -static void register_insn_emulation(struct insn_emulation_ops *ops) +static void __init register_insn_emulation(struct insn_emulation_ops *ops) { unsigned long flags; struct insn_emulation *insn; @@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = { { } }; -static void register_insn_emulation_sysctl(struct ctl_table *table) +static void __init register_insn_emulation_sysctl(struct ctl_table *table) { unsigned long flags; int i = 0; @@ -297,11 +297,8 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .quad 0b, 4b\n" \ - " .quad 1b, 4b\n" \ - " .popsection\n" \ + _ASM_EXTABLE(0b, 4b) \ + _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index feb6b4efa641..e6bc988e8dbf 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -21,24 +21,12 @@ #include <asm/cputype.h> #include <asm/cpufeature.h> -#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) -#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) -#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) - -#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ - MIDR_ARCHITECTURE_MASK) - static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry) { - u32 midr = read_cpuid_id(); - - if ((midr & CPU_MODEL_MASK) != entry->midr_model) - return false; - - midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; - - return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); + return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, + entry->midr_range_min, + entry->midr_range_max); } #define MIDR_RANGE(model, min, max) \ diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index b6bd7d447768..c7cfb8fe06f9 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -25,19 +25,30 @@ #include <asm/smp_plat.h> extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations acpi_parking_protocol_ops; extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; -static const struct cpu_operations *supported_cpu_ops[] __initconst = { +static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; +static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + &acpi_parking_protocol_ops, +#endif + &cpu_psci_ops, + NULL, +}; + static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops = supported_cpu_ops; + const struct cpu_operations **ops; + + ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; while (*ops) { if (!strcmp(name, (*ops)->name)) @@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu) } } else { enable_method = acpi_get_enable_method(cpu); - if (!enable_method) - pr_err("Unsupported ACPI enable-method\n"); + if (!enable_method) { + /* + * In ACPI systems the boot CPU does not require + * checking the enable method since for some + * boot protocol (ie parking protocol) it need not + * be initialized. Don't warn spuriously. + */ + if (cpu != 0) + pr_err("Unsupported ACPI enable-method\n"); + } } return enable_method; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0669c63281ea..7566cad9fa1d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -67,6 +67,10 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); .width = 0, \ } +/* meta feature for alternatives */ +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry); + static struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), @@ -123,6 +127,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_END, }; +static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static struct arm64_ftr_bits ftr_ctr[] = { U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), @@ -284,6 +293,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 3, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), @@ -408,6 +418,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); @@ -517,6 +528,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, + info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); /* * EL3 is not our concern. @@ -621,6 +634,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) return has_sre; } +static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + u32 rv_min, rv_max; + + /* Cavium ThunderX pass 1.x and 2.x */ + rv_min = 0; + rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK; + + return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -651,6 +676,28 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + { + .desc = "Software prefetching using PRFM", + .capability = ARM64_HAS_NO_HW_PREFETCH, + .matches = has_no_hw_prefetch, + }, +#ifdef CONFIG_ARM64_UAO + { + .desc = "User Access Override", + .capability = ARM64_HAS_UAO, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .field_pos = ID_AA64MMFR2_UAO_SHIFT, + .min_field_value = 1, + .enable = cpu_enable_uao, + }, +#endif /* CONFIG_ARM64_UAO */ +#ifdef CONFIG_ARM64_PAN + { + .capability = ARM64_ALT_PAN_NOT_UAO, + .matches = cpufeature_pan_not_uao, + }, +#endif /* CONFIG_ARM64_PAN */ {}, }; @@ -684,7 +731,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { {}, }; -static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: @@ -729,12 +776,12 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities * return rc; } -static void setup_cpu_hwcaps(void) +static void __init setup_cpu_hwcaps(void) { int i; const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; - for (i = 0; hwcaps[i].desc; i++) + for (i = 0; hwcaps[i].matches; i++) if (hwcaps[i].matches(&hwcaps[i])) cap_set_hwcap(&hwcaps[i]); } @@ -744,11 +791,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, { int i; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!caps[i].matches(&caps[i])) continue; - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) && caps[i].desc) pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } @@ -758,11 +805,12 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; - for (i = 0; caps[i].desc; i++) + for (i = 0; caps[i].matches; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) on_each_cpu(caps[i].enable, NULL, true); } @@ -790,35 +838,36 @@ static inline void set_sys_caps_initialised(void) static u64 __raw_read_system_reg(u32 sys_id) { switch (sys_id) { - case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); - case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); - - case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); - - case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + case SYS_ID_PFR0_EL1: return read_cpuid(SYS_ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return read_cpuid(SYS_ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return read_cpuid(SYS_ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return read_cpuid(SYS_ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return read_cpuid(SYS_ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return read_cpuid(SYS_ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return read_cpuid(SYS_ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return read_cpuid(SYS_ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return read_cpuid(SYS_ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return read_cpuid(SYS_ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return read_cpuid(SYS_ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(SYS_ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return read_cpuid(SYS_MVFR0_EL1); + case SYS_MVFR1_EL1: return read_cpuid(SYS_MVFR1_EL1); + case SYS_MVFR2_EL1: return read_cpuid(SYS_MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(SYS_ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(SYS_ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return read_cpuid(SYS_ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return read_cpuid(SYS_ID_AA64MMFR1_EL1); + case SYS_ID_AA64MMFR2_EL1: return read_cpuid(SYS_ID_AA64MMFR2_EL1); + case SYS_ID_AA64ISAR0_EL1: return read_cpuid(SYS_ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return read_cpuid(SYS_ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return read_cpuid(SYS_CNTFRQ_EL0); + case SYS_CTR_EL0: return read_cpuid(SYS_CTR_EL0); + case SYS_DCZID_EL0: return read_cpuid(SYS_DCZID_EL0); default: BUG(); return 0; @@ -868,7 +917,7 @@ void verify_local_cpu_capabilities(void) return; caps = arm64_features; - for (i = 0; caps[i].desc; i++) { + for (i = 0; caps[i].matches; i++) { if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) continue; /* @@ -881,7 +930,7 @@ void verify_local_cpu_capabilities(void) caps[i].enable(NULL); } - for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) { if (!cpus_have_hwcap(&caps[i])) continue; if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) @@ -897,7 +946,7 @@ static inline void set_sys_caps_initialised(void) #endif /* CONFIG_HOTPLUG_CPU */ -static void setup_feature_capabilities(void) +static void __init setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); @@ -927,3 +976,9 @@ void __init setup_cpu_features(void) pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", L1_CACHE_BYTES, cls); } + +static bool __maybe_unused +cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry) +{ + return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 212ae6361d8b..966fbd52550b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -201,35 +201,36 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); info->reg_ctr = read_cpuid_cachetype(); - info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_dczid = read_cpuid(SYS_DCZID_EL0); info->reg_midr = read_cpuid_id(); - info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); - info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); - info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); - info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); - info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); - info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); - info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); - info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); - - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); - - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + info->reg_id_aa64dfr0 = read_cpuid(SYS_ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1); + info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1); + info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1); + info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1); + + info->reg_id_dfr0 = read_cpuid(SYS_ID_DFR0_EL1); + info->reg_id_isar0 = read_cpuid(SYS_ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(SYS_ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(SYS_ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(SYS_ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(SYS_ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(SYS_ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(SYS_ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(SYS_ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(SYS_ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(SYS_ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(SYS_ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(SYS_ID_PFR1_EL1); + + info->reg_mvfr0 = read_cpuid(SYS_MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(SYS_MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(SYS_MVFR2_EL1); cpuinfo_detect_icache_policy(info); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index a773db92908b..f82036e02485 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,7 @@ ENTRY(entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - ldr x21, =stext_offset + movz x21, #:abs_g0:stext_offset add x21, x0, x21 /* diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7ed3d75f6304..1f7f5a2b61bf 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,6 +27,7 @@ #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/irq.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -88,9 +89,12 @@ .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE .endif @@ -108,6 +112,13 @@ .endif /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + + /* * Registers that may be useful after this macro is invoked: * * x21 - aborted SP @@ -164,8 +175,44 @@ alternative_endif .endm .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + mrs \rd, sp_el0 + .endm + + .macro irq_stack_entry + mov x19, sp // preserve the original sp + + /* + * Compare sp with the current thread_info, if the top + * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and + * should switch to the irq stack. + */ + and x25, x19, #~(THREAD_SIZE - 1) + cmp x25, tsk + b.ne 9998f + + this_cpu_ptr irq_stack, x25, x26 + mov x26, #IRQ_STACK_START_SP + add x26, x25, x26 + + /* switch to the irq stack */ + mov sp, x26 + + /* + * Add a dummy stack frame, this non-standard format is fixed up + * by unwind_frame() + */ + stp x29, x19, [sp, #-16]! + mov x29, sp + +9998: + .endm + + /* + * x19 should be preserved between irq_stack_entry and + * irq_stack_exit. + */ + .macro irq_stack_exit + mov sp, x19 .endm /* @@ -183,10 +230,11 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - adrp x1, handle_arch_irq - ldr x1, [x1, #:lo12:handle_arch_irq] + ldr_l x1, handle_arch_irq mov x0, sp + irq_stack_entry blr x1 + irq_stack_exit .endm .text @@ -358,10 +406,10 @@ el1_irq: bl trace_hardirqs_off #endif + get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT - get_thread_info tsk ldr w24, [tsk, #TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags @@ -599,6 +647,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) @@ -626,14 +676,14 @@ ret_fast_syscall_trace: work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ - ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' - tst x2, #PSR_MODE_MASK // user mode regs? - b.ne no_work_pending // returning to kernel enable_irq // enable interrupts for do_notify_resume() bl do_notify_resume b ret_to_user work_resched: +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off // the IRQs are off here, inform the tracing code +#endif bl schedule /* @@ -645,7 +695,6 @@ ret_to_user: and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending enable_step_tsk x1, x2 -no_work_pending: kernel_exit 0 ENDPROC(ret_to_user) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 4c46c54a3ad7..acc1afd5c749 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = { .notifier_call = fpsimd_cpu_pm_notifier, }; -static void fpsimd_pm_init(void) +static void __init fpsimd_pm_init(void) { cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); } diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c851be795080..ebecf9aa33d1 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, /* * Note: - * Due to modules and __init, code can disappear and change, - * we need to protect against faulting as well as code changing. - * We do this by aarch64_insn_*() which use the probe_kernel_*(). - * - * No lock is held here because all the modifications are run - * through stop_machine(). + * We are paranoid about modifying text, as if a bug were to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with aarch64_insn_*() which uses + * probe_kernel_*(), and make sure what we read is what we expected it + * to be before modifying it. */ if (validate) { if (aarch64_insn_read((void *)pc, &replaced)) @@ -93,6 +92,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return ftrace_modify_code(pc, old, new, true); } +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + int __init ftrace_dyn_arch_init(void) { return 0; @@ -125,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, * on other archs. It's unlikely on AArch64. */ old = *parent; - *parent = return_hooker; trace.func = self_addr; trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - *parent = old; + if (!ftrace_graph_entry(&trace)) return; - } err = ftrace_push_return_trace(old, self_addr, &trace.depth, frame_pointer); - if (err == -EBUSY) { - *parent = old; + if (err == -EBUSY) return; - } + else + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b685257926f0..a88a15447c3b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,6 +29,7 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/elf.h> #include <asm/kernel-pgtable.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> @@ -67,12 +68,11 @@ * in the entry routines. */ __HEAD - +_head: /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ #ifdef CONFIG_EFI -efi_head: /* * This add instruction has no meaningful effect except that * its opcode forms the magic "MZ" signature required by UEFI. @@ -83,9 +83,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad _kernel_offset_le // Image load offset from start of RAM, little-endian - .quad _kernel_size_le // Effective size of kernel image, little-endian - .quad _kernel_flags_le // Informative flags, little-endian + le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian + le64sym _kernel_size_le // Effective size of kernel image, little-endian + le64sym _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved @@ -94,14 +94,14 @@ efi_head: .byte 0x4d .byte 0x64 #ifdef CONFIG_EFI - .long pe_header - efi_head // Offset to the PE header. + .long pe_header - _head // Offset to the PE header. #else .word 0 // reserved #endif #ifdef CONFIG_EFI .globl __efistub_stext_offset - .set __efistub_stext_offset, stext - efi_head + .set __efistub_stext_offset, stext - _head .align 3 pe_header: .ascii "PE" @@ -124,7 +124,7 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_entry - _head // AddressOfEntryPoint .long __efistub_stext_offset // BaseOfCode extra_header_fields: @@ -139,7 +139,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _end - efi_head // SizeOfImage + .long _end - _head // SizeOfImage // Everything before the kernel image is considered part of the header .long __efistub_stext_offset // SizeOfHeaders @@ -210,6 +210,7 @@ section_table: ENTRY(stext) bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode + mov x23, xzr // KASLR offset, defaults to 0 adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag bl __create_page_tables // x25=TTBR0, x26=TTBR1 @@ -219,11 +220,13 @@ ENTRY(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, =__mmap_switched // address to jump to after + ldr x27, 0f // address to jump to after // MMU has been enabled adr_l lr, __enable_mmu // return (PIC) address b __cpu_setup // initialise processor ENDPROC(stext) + .align 3 +0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -311,7 +314,7 @@ ENDPROC(preserve_boot_args) __create_page_tables: adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir - mov x27, lr + mov x28, lr /* * Invalidate the idmap and swapper page tables to avoid potential @@ -389,9 +392,11 @@ __create_page_tables: * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET + ldr x5, =KIMAGE_VADDR + add x5, x5, x23 // add KASLR displacement create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END // __va(KERNEL_END) + ldr w6, kernel_img_size + add x6, x6, x5 mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -405,9 +410,11 @@ __create_page_tables: dmb sy bl __inval_cache_range - mov lr, x27 - ret + ret x28 ENDPROC(__create_page_tables) + +kernel_img_size: + .long _end - (_head - TEXT_OFFSET) .ltorg /* @@ -415,21 +422,81 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr_l x6, __bss_start - adr_l x7, __bss_stop + mov x28, lr // preserve LR + adr_l x8, vectors // load VBAR_EL1 with virtual + msr vbar_el1, x8 // vector table address + isb -1: cmp x6, x7 + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + dsb ishst // Make zero page visible to PTW + +#ifdef CONFIG_RELOCATABLE + + /* + * Iterate over each entry in the relocation table, and apply the + * relocations in place. + */ + adr_l x8, __dynsym_start // start of symbol table + adr_l x9, __reloc_start // start of reloc table + adr_l x10, __reloc_end // end of reloc table + +0: cmp x9, x10 b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: + ldp x11, x12, [x9], #24 + ldr x13, [x9, #-8] + cmp w12, #R_AARCH64_RELATIVE + b.ne 1f + add x13, x13, x23 // relocate + str x13, [x11, x23] + b 0b + +1: cmp w12, #R_AARCH64_ABS64 + b.ne 0b + add x12, x12, x12, lsl #1 // symtab offset: 24x top word + add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word + ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx + ldr x15, [x12, #8] // Elf64_Sym::st_value + cmp w14, #-0xf // SHN_ABS (0xfff1) ? + add x14, x15, x23 // relocate + csel x15, x14, x15, ne + add x15, x13, x15 + str x15, [x11, x23] + b 0b + +2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr + dc cvac, x8 // value visible to secondaries + dsb sy // with MMU off +#endif + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer - str_l x24, memstart_addr, x6 // Save PHYS_OFFSET + + ldr_l x4, kimage_vaddr // Save the offset between + sub x4, x4, x24 // the kernel virtual and + str_l x4, kimage_voffset, x5 // physical mappings + mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init #endif +#ifdef CONFIG_RANDOMIZE_BASE + cbnz x23, 0f // already running randomized? + mov x0, x21 // pass FDT address in x0 + bl kaslr_early_init // parse FDT for KASLR options + cbz x0, 0f // KASLR disabled? just proceed + mov x23, x0 // record KASLR offset + ret x28 // we must enable KASLR, return + // to __enable_mmu() +0: +#endif b start_kernel ENDPROC(__mmap_switched) @@ -438,6 +505,10 @@ ENDPROC(__mmap_switched) * hotplug and needs to have the same protections as the text region */ .section ".text","ax" + +ENTRY(kimage_vaddr) + .quad _text - TEXT_OFFSET + /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. @@ -603,14 +674,22 @@ ENTRY(secondary_startup) adrp x26, swapper_pg_dir bl __cpu_setup // initialise processor - ldr x21, =secondary_data - ldr x27, =__secondary_switched // address to jump to after enabling the MMU + ldr x8, kimage_vaddr + ldr w9, 0f + sub x27, x8, w9, sxtw // address to jump to after enabling the MMU b __enable_mmu ENDPROC(secondary_startup) +0: .long (_text - TEXT_OFFSET) - __secondary_switched ENTRY(__secondary_switched) - ldr x0, [x21] // get secondary_data.stack + adr_l x5, vectors + msr vbar_el1, x5 + isb + + ldr_l x0, secondary_data // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -628,12 +707,11 @@ ENDPROC(__secondary_switched) */ .section ".idmap.text", "ax" __enable_mmu: + mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value mrs x1, ID_AA64MMFR0_EL1 ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - ldr x5, =vectors - msr vbar_el1, x5 msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb @@ -647,6 +725,26 @@ __enable_mmu: ic iallu dsb nsh isb +#ifdef CONFIG_RANDOMIZE_BASE + mov x19, x0 // preserve new SCTLR_EL1 value + blr x27 + + /* + * If we return here, we have a KASLR displacement in x23 which we need + * to take into account by discarding the current kernel mapping and + * creating a new one. + */ + msr sctlr_el1, x18 // disable the MMU + isb + bl __create_page_tables // recreate kernel mapping + + msr sctlr_el1, x19 // re-enable the MMU + isb + ic iallu // flush instructions fetched + dsb nsh // via old mapping + isb + add x27, x27, x23 // relocated __mmap_switched +#endif br x27 ENDPROC(__enable_mmu) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index bc2abb8b1599..db1bf57948f1 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -26,31 +26,40 @@ * There aren't any ELF relocations we can use to endian-swap values known only * at link time (e.g. the subtraction of two symbol addresses), so we must get * the linker to endian-swap certain values before emitting them. + * + * Note that, in order for this to work when building the ELF64 PIE executable + * (for KASLR), these values should not be referenced via R_AARCH64_ABS64 + * relocations, since these are fixed up at runtime rather than at build time + * when PIE is in effect. So we need to split them up in 32-bit high and low + * words. */ #ifdef CONFIG_CPU_BIG_ENDIAN -#define DATA_LE64(data) \ - ((((data) & 0x00000000000000ff) << 56) | \ - (((data) & 0x000000000000ff00) << 40) | \ - (((data) & 0x0000000000ff0000) << 24) | \ - (((data) & 0x00000000ff000000) << 8) | \ - (((data) & 0x000000ff00000000) >> 8) | \ - (((data) & 0x0000ff0000000000) >> 24) | \ - (((data) & 0x00ff000000000000) >> 40) | \ - (((data) & 0xff00000000000000) >> 56)) +#define DATA_LE32(data) \ + ((((data) & 0x000000ff) << 24) | \ + (((data) & 0x0000ff00) << 8) | \ + (((data) & 0x00ff0000) >> 8) | \ + (((data) & 0xff000000) >> 24)) #else -#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#define DATA_LE32(data) ((data) & 0xffffffff) #endif +#define DEFINE_IMAGE_LE64(sym, data) \ + sym##_lo32 = DATA_LE32((data) & 0xffffffff); \ + sym##_hi32 = DATA_LE32((data) >> 32) + #ifdef CONFIG_CPU_BIG_ENDIAN -#define __HEAD_FLAG_BE 1 +#define __HEAD_FLAG_BE 1 #else -#define __HEAD_FLAG_BE 0 +#define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) + +#define __HEAD_FLAG_PHYS_BASE 1 -#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ - (__HEAD_FLAG_PAGE_SIZE << 1)) +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1) | \ + (__HEAD_FLAG_PHYS_BASE << 3)) /* * These will output as part of the Image header, which should be little-endian @@ -58,13 +67,23 @@ * endian swapped in head.S, all are done here for consistency. */ #define HEAD_SYMBOLS \ - _kernel_size_le = DATA_LE64(_end - _text); \ - _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ - _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text); \ + DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ + DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); #ifdef CONFIG_EFI /* + * Prevent the symbol aliases below from being emitted into the kallsyms + * table, by forcing them to be absolute symbols (which are conveniently + * ignored by scripts/kallsyms) rather than section relative symbols. + * The distinction is only relevant for partial linking, and only for symbols + * that are defined within a section declaration (which is not the case for + * the definitions below) so the resulting values will be identical. + */ +#define KALLSYMS_HIDE(sym) ABSOLUTE(sym) + +/* * The EFI stub has its own symbol namespace prefixed by __efistub_, to * isolate it from the kernel proper. The following symbols are legally * accessed by the stub, so provide some aliases to make them accessible. @@ -73,25 +92,25 @@ * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub___flush_dcache_area = __pi___flush_dcache_area; +__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp); +__efistub_memchr = KALLSYMS_HIDE(__pi_memchr); +__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub_memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub_memset = KALLSYMS_HIDE(__pi_memset); +__efistub_strlen = KALLSYMS_HIDE(__pi_strlen); +__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); +__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); +__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); #ifdef CONFIG_KASAN -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; +__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy); +__efistub___memmove = KALLSYMS_HIDE(__pi_memmove); +__efistub___memset = KALLSYMS_HIDE(__pi_memset); #endif -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; +__efistub__text = KALLSYMS_HIDE(_text); +__efistub__end = KALLSYMS_HIDE(_end); +__efistub__edata = KALLSYMS_HIDE(_edata); #endif diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9f17ec071ee0..2386b26c0712 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -30,6 +30,9 @@ unsigned long irq_err_count; +/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */ +DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16); + int arch_show_interrupts(struct seq_file *p, int prec) { show_ipi_list(p, prec); diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c new file mode 100644 index 000000000000..582983920054 --- /dev/null +++ b/arch/arm64/kernel/kaslr.c @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/mm_types.h> +#include <linux/sched.h> +#include <linux/types.h> + +#include <asm/fixmap.h> +#include <asm/kernel-pgtable.h> +#include <asm/memory.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/sections.h> + +u64 __read_mostly module_alloc_base; +u16 __initdata memstart_offset_seed; + +static __init u64 get_kaslr_seed(void *fdt) +{ + int node, len; + u64 *prop; + u64 ret; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + return 0; + + prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len); + if (!prop || len != sizeof(u64)) + return 0; + + ret = fdt64_to_cpu(*prop); + *prop = 0; + return ret; +} + +static __init const u8 *get_cmdline(void *fdt) +{ + static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + int node; + const u8 *prop; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + return prop; + } +out: + return default_cmdline; +} + +extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, + pgprot_t prot); + +/* + * This routine will be executed with the kernel mapped at its default virtual + * address, and if it returns successfully, the kernel will be remapped, and + * start_kernel() will be executed from a randomized virtual offset. The + * relocation will result in all absolute references (e.g., static variables + * containing function pointers) to be reinitialized, and zero-initialized + * .bss variables will be reset to 0. + */ +u64 __init kaslr_early_init(u64 dt_phys) +{ + void *fdt; + u64 seed, offset, mask, module_range; + const u8 *cmdline, *str; + int size; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; + + /* + * Try to map the FDT early. If this fails, we simply bail, + * and proceed with KASLR disabled. We will make another + * attempt at mapping the FDT in setup_machine() + */ + early_fixmap_init(); + fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + if (!fdt) + return 0; + + /* + * Retrieve (and wipe) the seed from the FDT + */ + seed = get_kaslr_seed(fdt); + if (!seed) + return 0; + + /* + * Check if 'nokaslr' appears on the command line, and + * return 0 if that is the case. + */ + cmdline = get_cmdline(fdt); + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + return 0; + + /* + * OK, so we are proceeding with KASLR enabled. Calculate a suitable + * kernel image offset from the seed. Let's place the kernel in the + * lower half of the VMALLOC area (VA_BITS - 2). + * Even if we could randomize at page granularity for 16k and 64k pages, + * let's always round to 2 MB so we don't interfere with the ability to + * map using contiguous PTEs + */ + mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); + offset = seed & mask; + + /* use the top 16 bits to randomize the linear region */ + memstart_offset_seed = seed >> 48; + + /* + * The kernel Image should not extend across a 1GB/32MB/512MB alignment + * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this + * happens, increase the KASLR offset by the size of the kernel image. + */ + if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != + (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) + offset = (offset + (u64)(_end - _text)) & mask; + + if (IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN does not expect the module region to intersect the + * vmalloc region, since shadow memory is allocated for each + * module at load time, whereas the vmalloc region is shadowed + * by KASAN zero pages. So keep modules out of the vmalloc + * region if KASAN is enabled. + */ + return offset; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region independently from the core + * kernel. This prevents modules from leaking any information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; + module_alloc_base = VMALLOC_START; + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return offset; +} diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c new file mode 100644 index 000000000000..1ce90d8450ae --- /dev/null +++ b/arch/arm64/kernel/module-plts.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sort.h> + +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, + Elf64_Sym *sym) +{ + struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr; + int i = mod->arch.plt_num_entries; + u64 val = sym->st_value + rela->r_addend; + + /* + * We only emit PLT entries against undefined (SHN_UNDEF) symbols, + * which are listed in the ELF symtab section, but without a type + * or a size. + * So, similar to how the module loader uses the Elf64_Sym::st_value + * field to store the resolved addresses of undefined symbols, let's + * borrow the Elf64_Sym::st_size field (whose value is never used by + * the module loader, even for symbols that are defined) to record + * the address of a symbol's associated PLT entry as we emit it for a + * zero addend relocation (which is the only kind we have to deal with + * in practice). This allows us to find duplicates without having to + * go through the table every time. + */ + if (rela->r_addend == 0 && sym->st_size != 0) { + BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]); + return sym->st_size; + } + + mod->arch.plt_num_entries++; + BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries); + + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + plt[i] = (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; + + if (rela->r_addend == 0) + sym->st_size = (u64)&plt[i]; + + return (u64)&plt[i]; +} + +#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rela(const void *a, const void *b) +{ + const Elf64_Rela *x = a, *y = b; + int i; + + /* sort by type, symbol index and addend */ + i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info)); + if (i == 0) + i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info)); + if (i == 0) + i = cmp_3way(x->r_addend, y->r_addend); + return i; +} + +static bool duplicate_rel(const Elf64_Rela *rela, int num) +{ + /* + * Entries are sorted by type, symbol index and addend. That means + * that, if a duplicate entry exists, it must be in the preceding + * slot. + */ + return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0; +} + +static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num) +{ + unsigned int ret = 0; + Elf64_Sym *s; + int i; + + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_AARCH64_JUMP26: + case R_AARCH64_CALL26: + /* + * We only have to consider branch targets that resolve + * to undefined symbols. This is not simply a heuristic, + * it is a fundamental limitation, since the PLT itself + * is part of the module, and needs to be within 128 MB + * as well, so modules can never grow beyond that limit. + */ + s = syms + ELF64_R_SYM(rela[i].r_info); + if (s->st_shndx != SHN_UNDEF) + break; + + /* + * Jump relocations with non-zero addends against + * undefined symbols are supported by the ELF spec, but + * do not occur in practice (e.g., 'jump n bytes past + * the entry point of undefined function symbol f'). + * So we need to support them, but there is no need to + * take them into consideration when trying to optimize + * this code. So let's only check for duplicates when + * the addend is zero: this allows us to record the PLT + * entry address in the symbol table itself, rather than + * having to search the list for duplicates each time we + * emit one. + */ + if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) + ret++; + break; + } + } + return ret; +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned long plt_max_entries = 0; + Elf64_Sym *syms = NULL; + int i; + + /* + * Find the empty .plt section so we can expand it to store the PLT + * entries. Record the symtab address as well. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0) + mod->arch.plt = sechdrs + i; + else if (sechdrs[i].sh_type == SHT_SYMTAB) + syms = (Elf64_Sym *)sechdrs[i].sh_addr; + } + + if (!mod->arch.plt) { + pr_err("%s: module PLT section missing\n", mod->name); + return -ENOEXEC; + } + if (!syms) { + pr_err("%s: module symtab section missing\n", mod->name); + return -ENOEXEC; + } + + for (i = 0; i < ehdr->e_shnum; i++) { + Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; + int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dstsec->sh_flags & SHF_EXECINSTR)) + continue; + + /* sort by type, symbol index and addend */ + sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + + plt_max_entries += count_plts(syms, rels, numrels); + } + + mod->arch.plt->sh_type = SHT_NOBITS; + mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry); + mod->arch.plt_num_entries = 0; + mod->arch.plt_max_entries = plt_max_entries; + return 0; +} diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f4bc779e62e8..7f316982ce00 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -30,17 +30,30 @@ #include <asm/insn.h> #include <asm/sections.h> -#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX -#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 - void *module_alloc(unsigned long size) { void *p; - p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + MODULES_VSIZE, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + !IS_ENABLED(CONFIG_KASAN)) + /* + * KASAN can only deal with module allocations being served + * from the reserved module region, since the remainder of + * the vmalloc region is already backed by zero shadow pages, + * and punching holes into it is non-trivial. Since the module + * region is not randomized when KASAN is enabled, it is even + * less likely that the module region gets exhausted, so we + * can simply omit this fallback in that case. + */ + p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); return NULL; @@ -75,15 +88,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) { - u64 imm_mask = (1 << len) - 1; s64 sval = do_reloc(op, place, val); switch (len) { case 16: *(s16 *)place = sval; + if (sval < S16_MIN || sval > U16_MAX) + return -ERANGE; break; case 32: *(s32 *)place = sval; + if (sval < S32_MIN || sval > U32_MAX) + return -ERANGE; break; case 64: *(s64 *)place = sval; @@ -92,34 +108,23 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) pr_err("Invalid length (%d) for data relocation\n", len); return 0; } - - /* - * Extract the upper value bits (including the sign bit) and - * shift them to bit 0. - */ - sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); - - /* - * Overflow has occurred if the value is not representable in - * len bits (i.e the bottom len bits are not sign-extended and - * the top bits are not all zero). - */ - if ((u64)(sval + 1) > 2) - return -ERANGE; - return 0; } +enum aarch64_insn_movw_imm_type { + AARCH64_INSN_IMM_MOVNZ, + AARCH64_INSN_IMM_MOVKZ, +}; + static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_insn_imm_type imm_type) + int lsb, enum aarch64_insn_movw_imm_type imm_type) { - u64 imm, limit = 0; + u64 imm; s64 sval; u32 insn = le32_to_cpu(*(u32 *)place); sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; + imm = sval >> lsb; if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* @@ -128,7 +133,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * immediate is less than zero. */ insn &= ~(3 << 29); - if ((s64)imm >= 0) { + if (sval >= 0) { /* >=0: Set the instruction to MOVZ (opcode 10b). */ insn |= 2 << 29; } else { @@ -140,29 +145,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, */ imm = ~imm; } - imm_type = AARCH64_INSN_IMM_MOVK; } /* Update the instruction with the new encoding. */ - insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); *(u32 *)place = cpu_to_le32(insn); - /* Shift out the immediate field. */ - sval >>= 16; - - /* - * For unsigned immediates, the overflow check is straightforward. - * For signed immediates, the sign bit is actually the bit past the - * most significant bit of the field. - * The AARCH64_INSN_IMM_16 immediate type is unsigned. - */ - if (imm_type != AARCH64_INSN_IMM_16) { - sval++; - limit++; - } - - /* Check the upper bits depending on the sign of the immediate. */ - if ((u64)sval > limit) + if (imm > U16_MAX) return -ERANGE; return 0; @@ -267,25 +256,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - AARCH64_INSN_IMM_16); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, @@ -302,7 +291,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, @@ -311,7 +300,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, @@ -320,7 +309,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - AARCH64_INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVKZ); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, @@ -388,6 +377,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + ovf == -ERANGE) { + val = module_emit_plt_entry(me, &rel[i], sym); + ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, + 26, AARCH64_INSN_IMM_26); + } break; default: diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds new file mode 100644 index 000000000000..8949f6c6f729 --- /dev/null +++ b/arch/arm64/kernel/module.lds @@ -0,0 +1,3 @@ +SECTIONS { + .plt (NOLOAD) : { BYTE(0) } +} diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 3aa74830cc69..ff4665462a02 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -164,8 +164,11 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, callchain_trace, entry); + walk_stackframe(current, &frame, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 62068ded1e90..6f3fb46170bf 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -46,6 +46,7 @@ #include <linux/notifier.h> #include <trace/events/power.h> +#include <asm/alternative.h> #include <asm/compat.h> #include <asm/cacheflush.h> #include <asm/fpsimd.h> @@ -346,6 +347,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; + if (IS_ENABLED(CONFIG_ARM64_UAO) && + cpus_have_cap(ARM64_HAS_UAO)) + childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -374,6 +378,17 @@ static void tls_thread_switch(struct task_struct *next) : : "r" (tpidr), "r" (tpidrro)); } +/* Restore the UAO state depending on next's addr_limit */ +static void uao_thread_switch(struct task_struct *next) +{ + if (IS_ENABLED(CONFIG_ARM64_UAO)) { + if (task_thread_info(next)->addr_limit == KERNEL_DS) + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); + else + asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); + } +} + /* * Thread switching. */ @@ -386,6 +401,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + uao_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -410,11 +426,14 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = p->curr_ret_stack; +#endif stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || - unwind_frame(&frame)) + unwind_frame(p, &frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 6c4fd2810ecb..1718706fde83 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -43,8 +43,11 @@ void *return_address(unsigned int level) frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)return_address; /* dummy */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = current->curr_ret_stack; +#endif - walk_stackframe(&frame, save_return_addr, &data); + walk_stackframe(current, &frame, save_return_addr, &data); if (!data.level) return data.addr; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479147db..42371f69def3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -62,6 +62,7 @@ #include <asm/memblock.h> #include <asm/efi.h> #include <asm/xen/hypervisor.h> +#include <asm/mmu_context.h> phys_addr_t __fdt_pointer __initdata; @@ -313,6 +314,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); @@ -381,3 +388,32 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); + +/* + * Dump out kernel offset information on panic. + */ +static int dump_kernel_offset(struct notifier_block *self, unsigned long v, + void *p) +{ + u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) { + pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n", + kaslr_offset, KIMAGE_VADDR); + } else { + pr_emerg("Kernel Offset: disabled\n"); + } + return 0; +} + +static struct notifier_block kernel_offset_notifier = { + .notifier_call = dump_kernel_offset +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_offset_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index f586f7c875e2..e33fe33876ab 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -173,6 +173,9 @@ ENTRY(cpu_resume) /* load physical address of identity map page table in x1 */ adrp x1, idmap_pg_dir mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context physical address * pointer and x1 to contain physical address of 1:1 page tables diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b1adc51b2c2e..24cb4f800033 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -70,6 +70,7 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, + IPI_WAKEUP }; /* @@ -149,9 +150,7 @@ asmlinkage void secondary_start_kernel(void) * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); @@ -445,6 +444,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) /* map the logical cpu id to cpu MPIDR */ cpu_logical_map(cpu_count) = hwid; + /* + * Set-up the ACPI parking protocol cpu entries + * while initializing the cpu_logical_map to + * avoid parsing MADT entries multiple times for + * nothing (ie a valid cpu_logical_map entry should + * contain a valid parking protocol data set to + * initialize the cpu if the parking protocol is + * the only available enable method). + */ + acpi_set_mailbox_entry(cpu_count, processor); + cpu_count++; } @@ -627,6 +637,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_WAKEUP, "CPU wake-up interrupts"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) @@ -670,6 +681,13 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL +void arch_send_wakeup_ipi_mask(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_WAKEUP); +} +#endif + #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { @@ -747,6 +765,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; #endif +#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL + case IPI_WAKEUP: + WARN_ONCE(!acpi_parking_protocol_valid(cpu), + "CPU%u: Wake-up IPI outside the ACPI parking protocol\n", + cpu); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..cfd46c227c8c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -17,9 +17,11 @@ */ #include <linux/kernel.h> #include <linux/export.h> +#include <linux/ftrace.h> #include <linux/sched.h> #include <linux/stacktrace.h> +#include <asm/irq.h> #include <asm/stacktrace.h> /* @@ -35,25 +37,82 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int notrace unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; + unsigned long irq_stack_ptr; + + /* + * Switching between stacks is valid when tracing current and in + * non-preemptible context. + */ + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; low = frame->sp; - high = ALIGN(low, THREAD_SIZE); + /* irq stacks are not THREAD_SIZE aligned */ + if (on_irq_stack(frame->sp, raw_smp_processor_id())) + high = irq_stack_ptr; + else + high = ALIGN(low, THREAD_SIZE) - 0x20; - if (fp < low || fp > high - 0x18 || fp & 0xf) + if (fp < low || fp > high || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); frame->pc = *(unsigned long *)(fp + 8); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (tsk && tsk->ret_stack && + (frame->pc == (unsigned long)return_to_handler)) { + /* + * This is a case where function graph tracer has + * modified a return address (LR) in a stack frame + * to hook a function return. + * So replace it to an original value. + */ + frame->pc = tsk->ret_stack[frame->graph--].ret; + } +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + /* + * Check whether we are going to walk through from interrupt stack + * to task stack. + * If we reach the end of the stack - and its an interrupt stack, + * unpack the dummy frame to find the original elr. + * + * Check the frame->fp we read from the bottom of the irq_stack, + * and the original task stack pointer are both in current->stack. + */ + if (frame->sp == irq_stack_ptr) { + struct pt_regs *irq_args; + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + + if (object_is_on_stack((void *)orig_sp) && + object_is_on_stack((void *)frame->fp)) { + frame->sp = orig_sp; + + /* orig_sp is the saved pt_regs, find the elr */ + irq_args = (struct pt_regs *)orig_sp; + frame->pc = irq_args->pc; + } else { + /* + * This frame has a non-standard format, and we + * didn't fix it, because the data looked wrong. + * Refuse to output this frame. + */ + return -EINVAL; + } + } + return 0; } -void notrace walk_stackframe(struct stackframe *frame, +void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data) { while (1) { @@ -61,7 +120,7 @@ void notrace walk_stackframe(struct stackframe *frame, if (fn(frame, data)) break; - ret = unwind_frame(frame); + ret = unwind_frame(tsk, frame); if (ret < 0) break; } @@ -112,8 +171,11 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - walk_stackframe(&frame, save_trace, &data); + walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1095aa483a1c..66055392f445 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; int ret; unsigned long flags; @@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) ret = __cpu_suspend_enter(arg, fn); if (ret == 0) { /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); + cpu_uninstall_idmap(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 13339b6ffc1a..59779699a1a4 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,8 +52,11 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = -1; /* no task info */ +#endif do { - int ret = unwind_frame(&frame); + int ret = unwind_frame(NULL, &frame); if (ret < 0) return 0; } while (in_lock_functions(frame.pc)); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index e9b9b5364393..c5392081b49b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -146,17 +146,24 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; + unsigned long irq_stack_ptr; + int skip; + + /* + * Switching between stacks is valid when tracing current and in + * non-preemptible context. + */ + if (tsk == current && !preemptible()) + irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + else + irq_stack_ptr = 0; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) tsk = current; - if (regs) { - frame.fp = regs->regs[29]; - frame.sp = regs->sp; - frame.pc = regs->pc; - } else if (tsk == current) { + if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; @@ -168,21 +175,49 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame.graph = tsk->curr_ret_stack; +#endif - pr_emerg("Call trace:\n"); + skip = !!regs; + printk("Call trace:\n"); while (1) { unsigned long where = frame.pc; unsigned long stack; int ret; - dump_backtrace_entry(where); - ret = unwind_frame(&frame); + /* skip until specified stack frame */ + if (!skip) { + dump_backtrace_entry(where); + } else if (frame.fp == regs->regs[29]) { + skip = 0; + /* + * Mostly, this is the case where this function is + * called in panic/abort. As exception handler's + * stack frame does not contain the corresponding pc + * at which an exception has taken place, use regs->pc + * instead. + */ + dump_backtrace_entry(regs->pc); + } + ret = unwind_frame(tsk, &frame); if (ret < 0) break; stack = frame.sp; - if (in_exception_text(where)) + if (in_exception_text(where)) { + /* + * If we switched to the irq_stack before calling this + * exception handler, then the pt_regs will be on the + * task stack. The easiest way to tell is if the large + * pt_regs would overlap with the end of the irq_stack. + */ + if (stack < irq_stack_ptr && + (stack + sizeof(struct pt_regs)) > irq_stack_ptr) + stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + dump_mem("", "Exception stack", stack, stack + sizeof(struct pt_regs), false); + } } } @@ -456,22 +491,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); + pr_err("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pud_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); + pr_err("%s:%d: bad pud %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_err("%s:%d: bad pgd %016lx.\n", file, line, val); } /* GENERIC_BUG traps */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71426a78db12..e3f6cd740ea3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -87,15 +87,16 @@ SECTIONS EXIT_CALL *(.discard) *(.discard.*) + *(.interp .dynamic) } - . = PAGE_OFFSET + TEXT_OFFSET; + . = KIMAGE_VADDR + TEXT_OFFSET; .head.text : { _text = .; HEAD_TEXT } - ALIGN_DEBUG_RO + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -113,14 +114,12 @@ SECTIONS *(.got) /* Global offset table */ } - ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES - ALIGN_DEBUG_RO - _etext = .; /* End of text and rodata section */ ALIGN_DEBUG_RO_MIN(PAGE_SIZE) + _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) @@ -128,7 +127,6 @@ SECTIONS ARM_EXIT_KEEP(EXIT_TEXT) } - ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -143,9 +141,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(PAGE_SIZE); - __init_end = .; - . = ALIGN(4); .altinstructions : { __alt_instructions = .; @@ -155,8 +150,25 @@ SECTIONS .altinstr_replacement : { *(.altinstr_replacement) } + .rela : ALIGN(8) { + __reloc_start = .; + *(.rela .rela*) + __reloc_end = .; + } + .dynsym : ALIGN(8) { + __dynsym_start = .; + *(.dynsym) + } + .dynstr : { + *(.dynstr) + } + .hash : { + *(.hash) + } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) @@ -190,4 +202,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 86c289832272..309e3479dc2c 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -923,7 +923,7 @@ __hyp_panic_str: .align 2 /* - * u64 kvm_call_hyp(void *hypfn, ...); + * u64 __kvm_call_hyp(void *hypfn, ...); * * This is not really a variadic function in the classic C-way and care must * be taken when calling this to ensure parameters are passed in registers @@ -940,10 +940,10 @@ __hyp_panic_str: * used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. */ -ENTRY(kvm_call_hyp) +ENTRY(__kvm_call_hyp) hvc #0 ret -ENDPROC(kvm_call_hyp) +ENDPROC(__kvm_call_hyp) .macro invalid_vector label, target .align 2 diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 1a811ecf71da..c86b7909ef31 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -4,15 +4,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ strchr.o strrchr.o -# Tell the compiler to treat all general purpose registers as -# callee-saved, which allows for efficient runtime patching of the bl -# instruction in the caller with an atomic instruction when supported by -# the CPU. Result and argument registers are handled correctly, based on -# the function prototype. +# Tell the compiler to treat all general purpose registers (with the +# exception of the IP registers, which are already handled by the caller +# in case of a PLT) as callee-saved, which allows for efficient runtime +# patching of the bl instruction in the caller with an atomic instruction +# when supported by the CPU. Result and argument registers are handled +# correctly, based on the function prototype. lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ - -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18 + -fcall-saved-x18 diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a9723c71c52b..5d1cad3ce6d6 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -33,28 +33,28 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f 1: -USER(9f, str xzr, [x0], #8 ) +uao_user_alternative 9f, str, sttr, xzr, x0, 8 subs x1, x1, #8 b.pl 1b 2: adds x1, x1, #4 b.mi 3f -USER(9f, str wzr, [x0], #4 ) +uao_user_alternative 9f, str, sttr, wzr, x0, 4 sub x1, x1, #4 3: adds x1, x1, #2 b.mi 4f -USER(9f, strh wzr, [x0], #2 ) +uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f -USER(9f, strb wzr, [x0] ) +uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4699cd74f87e..17e8306dca29 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -34,7 +34,7 @@ */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val @@ -42,7 +42,7 @@ .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val @@ -50,7 +50,7 @@ .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val @@ -58,7 +58,7 @@ .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val @@ -67,11 +67,11 @@ end .req x5 ENTRY(__copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 81c8fc93c100..f7292dd08c84 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -35,44 +35,44 @@ * x0 - bytes not copied */ .macro ldrb1 ptr, regB, val - USER(9998f, ldrb \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val - USER(9998f, ldrh \ptr, [\regB], \val) + uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val - USER(9998f, ldr \ptr, [\regB], \val) + uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val - USER(9998f, ldp \ptr, \regB, [\regC], \val) + uao_ldp 9998f, \ptr, \regB, \regC, \val .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 512b9a7b980e..4c1e700840b6 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -18,6 +18,8 @@ #include <linux/const.h> #include <asm/assembler.h> #include <asm/page.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> /* * Copy a page from src to dest (both are page aligned) @@ -27,20 +29,65 @@ * x1 - src */ ENTRY(copy_page) - /* Assume cache line size is 64 bytes. */ - prfm pldl1strm, [x1, #64] -1: ldp x2, x3, [x1] +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop + nop +alternative_else + # Prefetch two cache lines ahead. + prfm pldl1strm, [x1, #128] + prfm pldl1strm, [x1, #256] +alternative_endif + + ldp x2, x3, [x1] ldp x4, x5, [x1, #16] ldp x6, x7, [x1, #32] ldp x8, x9, [x1, #48] - add x1, x1, #64 - prfm pldl1strm, [x1, #64] + ldp x10, x11, [x1, #64] + ldp x12, x13, [x1, #80] + ldp x14, x15, [x1, #96] + ldp x16, x17, [x1, #112] + + mov x18, #(PAGE_SIZE - 128) + add x1, x1, #128 +1: + subs x18, x18, #128 + +alternative_if_not ARM64_HAS_NO_HW_PREFETCH + nop +alternative_else + prfm pldl1strm, [x1, #384] +alternative_endif + stnp x2, x3, [x0] + ldp x2, x3, [x1] stnp x4, x5, [x0, #16] + ldp x4, x5, [x1, #16] stnp x6, x7, [x0, #32] + ldp x6, x7, [x1, #32] stnp x8, x9, [x0, #48] - add x0, x0, #64 - tst x1, #(PAGE_SIZE - 1) - b.ne 1b + ldp x8, x9, [x1, #48] + stnp x10, x11, [x0, #64] + ldp x10, x11, [x1, #64] + stnp x12, x13, [x0, #80] + ldp x12, x13, [x1, #80] + stnp x14, x15, [x0, #96] + ldp x14, x15, [x1, #96] + stnp x16, x17, [x0, #112] + ldp x16, x17, [x1, #112] + + add x0, x0, #128 + add x1, x1, #128 + + b.gt 1b + + stnp x2, x3, [x0] + stnp x4, x5, [x0, #16] + stnp x6, x7, [x0, #32] + stnp x8, x9, [x0, #48] + stnp x10, x11, [x0, #64] + stnp x12, x13, [x0, #80] + stnp x14, x15, [x0, #96] + stnp x16, x17, [x0, #112] + ret ENDPROC(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7512bbbc07ac..21faae60f988 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -37,7 +37,7 @@ .endm .macro strb1 ptr, regB, val - USER(9998f, strb \ptr, [\regB], \val) + uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val .endm .macro ldrh1 ptr, regB, val @@ -45,7 +45,7 @@ .endm .macro strh1 ptr, regB, val - USER(9998f, strh \ptr, [\regB], \val) + uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val .endm .macro ldr1 ptr, regB, val @@ -53,7 +53,7 @@ .endm .macro str1 ptr, regB, val - USER(9998f, str \ptr, [\regB], \val) + uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val .endm .macro ldp1 ptr, regB, regC, val @@ -61,16 +61,16 @@ .endm .macro stp1 ptr, regB, regC, val - USER(9998f, stp \ptr, \regB, [\regC], \val) + uao_stp 9998f, \ptr, \regB, \regC, \val .endm end .req x5 ENTRY(__copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index cfa44a6adc0a..6df07069a025 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -81,26 +81,32 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 ret ENDPIPROC(__flush_dcache_area) /* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 + ret +ENDPROC(__clean_dcache_area_pou) + +/* * __inval_cache_range(start, end) * - start - start address of region * - end - end address of region diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e87f53ff5f58..7275628ba59f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -187,7 +187,7 @@ switch_mm_fastpath: static int asids_init(void) { - int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4); switch (fld) { default: diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 13bbc3be6f5a..22e4cb4d6f53 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -24,8 +24,9 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr) { + struct page *page = virt_to_page(kto); copy_page(kto, kfrom); - __flush_dcache_area(kto, PAGE_SIZE); + flush_dcache_page(page); } EXPORT_SYMBOL_GPL(__cpu_copy_user_page); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index ddfb97a299f2..925b2b3a06f8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, static struct gen_pool *atomic_pool; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K -static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static int __init early_coherent_pool(char *p) { @@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb, return 0; } -static int register_iommu_dma_ops_notifier(struct bus_type *bus) +static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) { struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); int ret; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 5a22a119a74c..6be918478f85 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -35,7 +35,9 @@ struct addr_marker { }; enum address_markers_idx { - VMALLOC_START_NR = 0, + MODULES_START_NR = 0, + MODULES_END_NR, + VMALLOC_START_NR, VMALLOC_END_NR, #ifdef CONFIG_SPARSEMEM_VMEMMAP VMEMMAP_START_NR, @@ -45,12 +47,12 @@ enum address_markers_idx { FIXADDR_END_NR, PCI_START_NR, PCI_END_NR, - MODULES_START_NR, - MODUELS_END_NR, KERNEL_SPACE_NR, }; static struct addr_marker address_markers[] = { + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, { VMALLOC_START, "vmalloc() Area" }, { VMALLOC_END, "vmalloc() End" }, #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -61,9 +63,7 @@ static struct addr_marker address_markers[] = { { FIXADDR_TOP, "Fixmap end" }, { PCI_IO_START, "PCI I/O start" }, { PCI_IO_END, "PCI I/O end" }, - { MODULES_VADDR, "Modules start" }, - { MODULES_END, "Modules end" }, - { PAGE_OFFSET, "Kernel Mapping" }, + { PAGE_OFFSET, "Linear Mapping" }, { -1, NULL }, }; @@ -90,6 +90,11 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { .mask = PTE_USER, .val = PTE_USER, .set = "USR", diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 79444279ba8c..81acd4706878 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(instruction_pointer(regs)); if (fixup) - regs->pc = fixup->fixup; + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; return fixup != NULL; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 92ddac1e8ca2..44e56de23f79 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -192,6 +192,14 @@ out: return fault; } +static inline int permission_fault(unsigned int esr) +{ + unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; + + return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -225,12 +233,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - /* - * PAN bit set implies the fault happened in kernel space, but not - * in the arch's user access functions. - */ - if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) - goto no_context; + if (permission_fault(esr) && (addr < USER_DS)) { + if (get_fs() == KERNEL_DS) + die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + + if (!search_exception_tables(regs->pc)) + die("Accessing user space memory outside uaccess.h routines", regs, esr); + } /* * As per x86, we may deadlock here. However, since the kernel only @@ -561,3 +570,16 @@ void cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); } #endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_UAO +/* + * Kernel threads have fs=KERNEL_DS by default, and don't need to call + * set_fs(), devtmpfs in particular relies on this behaviour. + * We need to enable the feature at runtime (instead of adding it to + * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. + */ +void cpu_enable_uao(void *__unused) +{ + asm(SET_PSTATE_UAO(1)); +} +#endif /* CONFIG_ARM64_UAO */ diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c26b804015e8..46649d6e6c5a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!page_mapping(page)) return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } else if (icache_is_aivivt()) { - __flush_icache_all(); - } } /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 383b03ff38f8..da30529bb1f6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -41,15 +41,273 @@ int pud_huge(pud_t pud) #endif } +static int find_num_contig(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, size_t *pgsize) +{ + pgd_t *pgd = pgd_offset(mm, addr); + pud_t *pud; + pmd_t *pmd; + + *pgsize = PAGE_SIZE; + if (!pte_cont(pte)) + return 1; + if (!pgd_present(*pgd)) { + VM_BUG_ON(!pgd_present(*pgd)); + return 1; + } + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) { + VM_BUG_ON(!pud_present(*pud)); + return 1; + } + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + VM_BUG_ON(!pmd_present(*pmd)); + return 1; + } + if ((pte_t *)pmd == ptep) { + *pgsize = PMD_SIZE; + return CONT_PMDS; + } + return CONT_PTES; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + size_t pgsize; + int i; + int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize); + unsigned long pfn; + pgprot_t hugeprot; + + if (ncontig == 1) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pfn = pte_pfn(pte); + hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte)); + for (i = 0; i < ncontig; i++) { + pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep, + pte_val(pfn_pte(pfn, hugeprot))); + set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); + ptep++; + pfn += pgsize >> PAGE_SHIFT; + addr += pgsize; + } +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else if (sz == (PAGE_SIZE * CONT_PTES)) { + pmd_t *pmd = pmd_alloc(mm, pud, addr); + + WARN_ON(addr & (sz - 1)); + /* + * Note that if this code were ever ported to the + * 32-bit arm platform then it will cause trouble in + * the case where CONFIG_HIGHPTE is set, since there + * will be no pte_unmap() to correspond with this + * pte_alloc_map(). + */ + pte = pte_alloc_map(mm, NULL, pmd, addr); + } else if (sz == PMD_SIZE) { + if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && + pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } else if (sz == (PMD_SIZE * CONT_PMDS)) { + pmd_t *pmd; + + pmd = pmd_alloc(mm, pud, addr); + WARN_ON(addr & (sz - 1)); + return (pte_t *)pmd; + } + + pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr, + sz, pte, pte_val(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + if (pte_cont(pmd_pte(*pmd))) { + pmd = pmd_offset( + pud, (addr & CONT_PMD_MASK)); + return (pte_t *)pmd; + } + if (pmd_huge(*pmd)) + return (pte_t *)pmd; + pte = pte_offset_kernel(pmd, addr); + if (pte_present(*pte) && pte_cont(*pte)) { + pte = pte_offset_kernel( + pmd, (addr & CONT_PTE_MASK)); + return pte; + } + return NULL; +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + size_t pagesize = huge_page_size(hstate_vma(vma)); + + if (pagesize == CONT_PTE_SIZE) { + entry = pte_mkcont(entry); + } else if (pagesize == CONT_PMD_SIZE) { + entry = pmd_pte(pmd_mkcont(pte_pmd(entry))); + } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + } + return entry; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte; + + if (pte_cont(*ptep)) { + int ncontig, i; + size_t pgsize; + pte_t *cpte; + bool is_dirty = false; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + /* save the 1st pte to return */ + pte = ptep_get_and_clear(mm, addr, cpte); + for (i = 1; i < ncontig; ++i) { + /* + * If HW_AFDBM is enabled, then the HW could + * turn on the dirty bit for any of the page + * in the set, so check them all. + */ + ++cpte; + if (pte_dirty(ptep_get_and_clear(mm, addr, cpte))) + is_dirty = true; + } + if (is_dirty) + return pte_mkdirty(pte); + else + return pte; + } else { + return ptep_get_and_clear(mm, addr, ptep); + } +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + pte_t *cpte; + + if (pte_cont(pte)) { + int ncontig, i, changed = 0; + size_t pgsize = 0; + unsigned long pfn = pte_pfn(pte); + /* Select all bits except the pfn */ + pgprot_t hugeprot = + __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ + pte_val(pte)); + + cpte = huge_pte_offset(vma->vm_mm, addr); + pfn = pte_pfn(*cpte); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) { + changed = ptep_set_access_flags(vma, addr, cpte, + pfn_pte(pfn, + hugeprot), + dirty); + pfn += pgsize >> PAGE_SHIFT; + } + return changed; + } else { + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + } +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(mm, addr); + ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_set_wrprotect(mm, addr, cpte); + } else { + ptep_set_wrprotect(mm, addr, ptep); + } +} + +void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + if (pte_cont(*ptep)) { + int ncontig, i; + pte_t *cpte; + size_t pgsize = 0; + + cpte = huge_pte_offset(vma->vm_mm, addr); + ncontig = find_num_contig(vma->vm_mm, addr, cpte, + *cpte, &pgsize); + for (i = 0; i < ncontig; ++i, ++cpte) + ptep_clear_flush(vma, addr, cpte); + } else { + ptep_clear_flush(vma, addr, ptep); + } +} + static __init int setup_hugepagesz(char *opt) { unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { - pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); return 0; } return 1; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 224238943aff..2c38be3df4c8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,7 +35,10 @@ #include <linux/efi.h> #include <linux/swiotlb.h> +#include <asm/boot.h> #include <asm/fixmap.h> +#include <asm/kasan.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> #include <asm/sections.h> #include <asm/setup.h> @@ -45,7 +48,13 @@ #include "mm.h" -phys_addr_t memstart_addr __read_mostly = 0; +/* + * We need to be able to catch inadvertent references to memstart_addr + * that occur (potentially in generic code) before arm64_memblock_init() + * executes, which assigns it its actual value. So use a default value + * that cannot be mistaken for a real physical address. + */ +s64 memstart_addr __read_mostly = -1; phys_addr_t arm64_dma_phys_limit __read_mostly; #ifdef CONFIG_BLK_DEV_INITRD @@ -58,8 +67,8 @@ static int __init early_initrd(char *p) if (*endp == ',') { size = memparse(endp + 1, NULL); - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(start + size); + initrd_start = start; + initrd_end = start + size; } return 0; } @@ -71,7 +80,7 @@ early_param("initrd", early_initrd); * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -128,11 +137,11 @@ EXPORT_SYMBOL(pfn_valid); #endif #ifndef CONFIG_SPARSEMEM -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { } #else -static void arm64_memory_present(void) +static void __init arm64_memory_present(void) { struct memblock_region *reg; @@ -161,7 +170,57 @@ early_param("mem", early_mem); void __init arm64_memblock_init(void) { - memblock_enforce_memory_limit(memory_limit); + const s64 linear_region_size = -(s64)PAGE_OFFSET; + + /* + * Ensure that the linear region takes up exactly half of the kernel + * virtual address space. This way, we can distinguish a linear address + * from a kernel/module/vmalloc address by testing a single bit. + */ + BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); + + /* + * Select a suitable value for the base of physical memory. + */ + memstart_addr = round_down(memblock_start_of_DRAM(), + ARM64_MEMSTART_ALIGN); + + /* + * Remove the memory that we will not be able to cover with the + * linear mapping. Take care not to clip the kernel which may be + * high in memory. + */ + memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), + ULLONG_MAX); + if (memblock_end_of_DRAM() > linear_region_size) + memblock_remove(0, memblock_end_of_DRAM() - linear_region_size); + + /* + * Apply the memory limit if it was set. Since the kernel may be loaded + * high up in memory, add back the kernel region that must be accessible + * via the linear mapping. + */ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + memblock_enforce_memory_limit(memory_limit); + memblock_add(__pa(_text), (u64)(_end - _text)); + } + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern u16 memstart_offset_seed; + u64 range = linear_region_size - + (memblock_end_of_DRAM() - memblock_start_of_DRAM()); + + /* + * If the size of the linear region exceeds, by a sufficient + * margin, the size of the region that the available physical + * memory spans, randomize the linear region as well. + */ + if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) { + range = range / ARM64_MEMSTART_ALIGN + 1; + memstart_addr -= ARM64_MEMSTART_ALIGN * + ((range * memstart_offset_seed) >> 16); + } + } /* * Register the kernel text, kernel data, initrd, and initial @@ -169,8 +228,13 @@ void __init arm64_memblock_init(void) */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) - memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); + if (initrd_start) { + memblock_reserve(initrd_start, initrd_end - initrd_start); + + /* the generic initrd code expects virtual addresses */ + initrd_start = __phys_to_virt(initrd_start); + initrd_end = __phys_to_virt(initrd_end); + } #endif early_init_fdt_scan_reserved_mem(); @@ -304,35 +368,36 @@ void __init mem_init(void) #ifdef CONFIG_KASAN " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" #endif + " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" + " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld KB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" - " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" - " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + " memory : 0x%16lx - 0x%16lx (%6ld MB)\n", #ifdef CONFIG_KASAN MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), #endif + MLM(MODULES_VADDR, MODULES_END), MLG(VMALLOC_START, VMALLOC_END), + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(_sdata, _edata), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE), - MLM((unsigned long)virt_to_page(PAGE_OFFSET), + MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory)), #endif MLK(FIXADDR_START, FIXADDR_TOP), MLM(PCI_IO_START, PCI_IO_END), - MLM(MODULES_VADDR, MODULES_END), - MLM(PAGE_OFFSET, (unsigned long)high_memory), - MLK_ROUNDUP(__init_begin, __init_end), - MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(_sdata, _edata)); + MLM(__phys_to_virt(memblock_start_of_DRAM()), + (unsigned long)high_memory)); #undef MLK #undef MLM @@ -360,9 +425,8 @@ void __init mem_init(void) void free_initmem(void) { - fixup_init(); free_initmem_default(0); - free_alternatives_memory(); + fixup_init(); } #ifdef CONFIG_BLK_DEV_INITRD @@ -383,3 +447,28 @@ static int __init keepinitrd_setup(char *__unused) __setup("keepinitrd", keepinitrd_setup); #endif + +/* + * Dump out memory limit information on panic. + */ +static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p) +{ + if (memory_limit != (phys_addr_t)ULLONG_MAX) { + pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); + } else { + pr_emerg("Memory Limit: none\n"); + } + return 0; +} + +static struct notifier_block mem_limit_notifier = { + .notifier_call = dump_mem_limit, +}; + +static int __init register_mem_limit_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &mem_limit_notifier); + return 0; +} +__initcall(register_mem_limit_dumper); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index cf038c7d9fa9..757009daa9ed 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -16,9 +16,12 @@ #include <linux/memblock.h> #include <linux/start_kernel.h> +#include <asm/mmu_context.h> +#include <asm/kernel-pgtable.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> +#include <asm/sections.h> #include <asm/tlbflush.h> static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); @@ -32,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, if (pmd_none(*pmd)) pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); - pte = pte_offset_kernel(pmd, addr); + pte = pte_offset_kimg(pmd, addr); do { next = addr + PAGE_SIZE; set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), @@ -50,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud, if (pud_none(*pud)) pud_populate(&init_mm, pud, kasan_zero_pmd); - pmd = pmd_offset(pud, addr); + pmd = pmd_offset_kimg(pud, addr); do { next = pmd_addr_end(addr, end); kasan_early_pte_populate(pmd, addr, next); @@ -67,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd, if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, kasan_zero_pud); - pud = pud_offset(pgd, addr); + pud = pud_offset_kimg(pgd, addr); do { next = pud_addr_end(addr, end); kasan_early_pmd_populate(pud, addr, next); @@ -96,6 +99,21 @@ asmlinkage void __init kasan_early_init(void) kasan_map_early_shadow(); } +/* + * Copy the current shadow region into a new pgdir. + */ +void __init kasan_copy_shadow(pgd_t *pgdir) +{ + pgd_t *pgd, *pgd_new, *pgd_end; + + pgd = pgd_offset_k(KASAN_SHADOW_START); + pgd_end = pgd_offset_k(KASAN_SHADOW_END); + pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START); + do { + set_pgd(pgd_new, *pgd); + } while (pgd++, pgd_new++, pgd != pgd_end); +} + static void __init clear_pgds(unsigned long start, unsigned long end) { @@ -108,18 +126,18 @@ static void __init clear_pgds(unsigned long start, set_pgd(pgd_offset_k(start), __pgd(0)); } -static void __init cpu_set_ttbr1(unsigned long ttbr1) -{ - asm( - " msr ttbr1_el1, %0\n" - " isb" - : - : "r" (ttbr1)); -} - void __init kasan_init(void) { + u64 kimg_shadow_start, kimg_shadow_end; + u64 mod_shadow_start, mod_shadow_end; struct memblock_region *reg; + int i; + + kimg_shadow_start = (u64)kasan_mem_to_shadow(_text); + kimg_shadow_end = (u64)kasan_mem_to_shadow(_end); + + mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); + mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); /* * We are going to perform proper setup of shadow memory. @@ -129,13 +147,33 @@ void __init kasan_init(void) * setup will be finished. */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); - cpu_set_ttbr1(__pa(tmp_pg_dir)); - flush_tlb_all(); + dsb(ishst); + cpu_replace_ttbr1(tmp_pg_dir); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + vmemmap_populate(kimg_shadow_start, kimg_shadow_end, + pfn_to_nid(virt_to_pfn(_text))); + + /* + * vmemmap_populate() has populated the shadow region that covers the + * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round + * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent + * kasan_populate_zero_shadow() from replacing the page table entries + * (PMD or PTE) at the edges of the shadow region for the kernel + * image. + */ + kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE); + kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE); + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, - kasan_mem_to_shadow((void *)MODULES_VADDR)); + (void *)mod_shadow_start); + kasan_populate_zero_shadow((void *)kimg_shadow_end, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_zero_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); for_each_memblock(memory, reg) { void *start = (void *)__phys_to_virt(reg->base); @@ -155,9 +193,16 @@ void __init kasan_init(void) pfn_to_nid(virt_to_pfn(start))); } + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_zero_pte[i], + pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); + memset(kasan_zero_page, 0, PAGE_SIZE); - cpu_set_ttbr1(__pa(swapper_pg_dir)); - flush_tlb_all(); + cpu_replace_ttbr1(swapper_pg_dir); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 116ad654dd59..41421c724fb9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -30,8 +30,10 @@ #include <linux/slab.h> #include <linux/stop_machine.h> +#include <asm/barrier.h> #include <asm/cputype.h> #include <asm/fixmap.h> +#include <asm/kasan.h> #include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> @@ -44,13 +46,20 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 kimage_voffset __read_mostly; +EXPORT_SYMBOL(kimage_voffset); + /* * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -62,16 +71,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_alloc(unsigned long sz) +static phys_addr_t __init early_pgtable_alloc(void) { phys_addr_t phys; void *ptr; - phys = memblock_alloc(sz, sz); + phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); BUG_ON(!phys); - ptr = __va(phys); - memset(ptr, 0, sz); - return ptr; + + /* + * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE + * slot will be free, so we can (ab)use the FIX_PTE slot to initialise + * any level of table. + */ + ptr = pte_set_fixmap(phys); + + memset(ptr, 0, PAGE_SIZE); + + /* + * Implicit barriers also ensure the zeroed page is visible to the page + * table walker + */ + pte_clear_fixmap(); + + return phys; } /* @@ -95,24 +118,30 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + phys_addr_t pte_phys; + BUG_ON(!pgtable_alloc); + pte_phys = pgtable_alloc(); + pte = pte_set_fixmap(pte_phys); if (pmd_sect(*pmd)) split_pmd(pmd, pte); - __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); flush_tlb_all(); + pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); - pte = pte_offset_kernel(pmd, addr); + pte = pte_set_fixmap_offset(pmd, addr); do { set_pte(pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + + pte_clear_fixmap(); } static void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -127,10 +156,29 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd) } while (pmd++, i++, i < PTRS_PER_PMD); } -static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, +#ifdef CONFIG_DEBUG_PAGEALLOC +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + + /* + * If debug_page_alloc is enabled we must map the linear map + * using pages. However, other mappings created by + * create_mapping_noalloc must use sections in some cases. Allow + * sections to be used in those cases, where no pgtable_alloc + * function is provided. + */ + return !pgtable_alloc || !debug_pagealloc_enabled(); +} +#else +static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) +{ + return true; +} +#endif + +static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -139,7 +187,10 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); + pmd = pmd_set_fixmap(pmd_phys); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -147,19 +198,20 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, */ split_pud(pud, pmd); } - pud_populate(mm, pud, pmd); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); flush_tlb_all(); + pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); - pmd = pmd_offset(pud, addr); + pmd = pmd_set_fixmap_offset(pud, addr); do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) { + if (((addr | next | phys) & ~SECTION_MASK) == 0 && + block_mappings_allowed(pgtable_alloc)) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | - pgprot_val(mk_sect_prot(prot)))); + pmd_set_huge(pmd, phys, prot); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -167,17 +219,19 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, if (!pmd_none(old_pmd)) { flush_tlb_all(); if (pmd_table(old_pmd)) { - phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + phys_addr_t table = pmd_page_paddr(old_pmd); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + prot, pgtable_alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); + + pmd_clear_fixmap(); } static inline bool use_1G_block(unsigned long addr, unsigned long next, @@ -192,31 +246,32 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, return true; } -static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, +static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(mm, pgd, pud); + phys_addr_t pud_phys; + BUG_ON(!pgtable_alloc); + pud_phys = pgtable_alloc(); + __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE); } BUG_ON(pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + pud = pud_set_fixmap_offset(pgd, addr); do { next = pud_addr_end(addr, end); /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys)) { + if (use_1G_block(addr, next, phys) && + block_mappings_allowed(pgtable_alloc)) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | - pgprot_val(mk_sect_prot(prot)))); + pud_set_huge(pud, phys, prot); /* * If we have an old value for a pud, it will @@ -228,51 +283,74 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, if (!pud_none(old_pud)) { flush_tlb_all(); if (pud_table(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + phys_addr_t table = pud_page_paddr(old_pud); if (!WARN_ON_ONCE(slab_is_available())) memblock_free(table, PAGE_SIZE); } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + alloc_init_pmd(pud, addr, next, phys, prot, + pgtable_alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); + + pud_clear_fixmap(); } /* * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, - phys_addr_t phys, unsigned long virt, +static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*alloc)(unsigned long size)) + phys_addr_t (*pgtable_alloc)(void)) { unsigned long addr, length, end, next; + /* + * If the virtual and physical address don't have the same offset + * within a page, we cannot map the region as the caller expects. + */ + if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) + return; + + phys &= PAGE_MASK; addr = virt & PAGE_MASK; length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void *late_alloc(unsigned long size) +static phys_addr_t late_pgtable_alloc(void) { - void *ptr; - - BUG_ON(size > PAGE_SIZE); - ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); - return ptr; + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*alloc)(void)) +{ + init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, +/* + * This function can only be used to modify existing table entries, + * without allocating new levels of table. Note that this permits the + * creation of new section or page entries. + */ +static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { @@ -280,16 +358,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, prot, early_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + NULL); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_alloc); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -301,69 +379,57 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), - phys, virt, size, prot, late_alloc); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, + late_pgtable_alloc); } -#ifdef CONFIG_DEBUG_RODATA -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { + unsigned long kernel_start = __pa(_stext); + unsigned long kernel_end = __pa(_etext); + /* - * Set up the executable regions using the existing section mappings - * for now. This will get more fine grained later once all memory - * is mapped + * Take care not to create a writable alias for the + * read-only text and rodata sections of the kernel image. */ - unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE); - - if (end < kernel_x_start) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else if (start >= kernel_x_end) { - create_mapping(start, __phys_to_virt(start), - end - start, PAGE_KERNEL); - } else { - if (start < kernel_x_start) - create_mapping(start, __phys_to_virt(start), - kernel_x_start - start, - PAGE_KERNEL); - create_mapping(kernel_x_start, - __phys_to_virt(kernel_x_start), - kernel_x_end - kernel_x_start, - PAGE_KERNEL_EXEC); - if (kernel_x_end < end) - create_mapping(kernel_x_end, - __phys_to_virt(kernel_x_end), - end - kernel_x_end, - PAGE_KERNEL); + + /* No overlap with the kernel text */ + if (end < kernel_start || start >= kernel_end) { + __create_pgd_mapping(pgd, start, __phys_to_virt(start), + end - start, PAGE_KERNEL, + early_pgtable_alloc); + return; } + /* + * This block overlaps the kernel text mapping. + * Map the portion(s) which don't overlap. + */ + if (start < kernel_start) + __create_pgd_mapping(pgd, start, + __phys_to_virt(start), + kernel_start - start, PAGE_KERNEL, + early_pgtable_alloc); + if (kernel_end < end) + __create_pgd_mapping(pgd, kernel_end, + __phys_to_virt(kernel_end), + end - kernel_end, PAGE_KERNEL, + early_pgtable_alloc); + + /* + * Map the linear alias of the [_stext, _etext) interval as + * read-only/non-executable. This makes the contents of the + * region accessible to subsystems such as hibernate, but + * protects it from inadvertent modification or execution. + */ + __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), + kernel_end - kernel_start, PAGE_KERNEL_RO, + early_pgtable_alloc); } -#else -static void __init __map_memblock(phys_addr_t start, phys_addr_t end) -{ - create_mapping(start, __phys_to_virt(start), end - start, - PAGE_KERNEL_EXEC); -} -#endif -static void __init map_mem(void) +static void __init map_mem(pgd_t *pgd) { struct memblock_region *reg; - phys_addr_t limit; - - /* - * Temporarily limit the memblock range. We need to do this as - * create_mapping requires puds, pmds and ptes to be allocated from - * memory addressable from the initial direct kernel mapping. - * - * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, - * memory starting from PHYS_OFFSET (which must be aligned to 2MB as - * per Documentation/arm64/booting.txt). - */ - limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; - memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -373,69 +439,87 @@ static void __init map_mem(void) if (start >= end) break; - if (ARM64_SWAPPER_USES_SECTION_MAPS) { - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. With - * the section maps, if the first block doesn't end on section - * size boundary, create_mapping() will try to allocate a pte - * page, which may be returned from an unmapped area. - * When section maps are not used, the pte page table for the - * current limit is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, SECTION_SIZE); - if (end < limit) { - limit = end & SECTION_MASK; - memblock_set_current_limit(limit); - } - } - __map_memblock(start, end); + __map_memblock(pgd, start, end); } - - /* Limit no longer required. */ - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } -static void __init fixup_executable(void) +void mark_rodata_ro(void) { -#ifdef CONFIG_DEBUG_RODATA - /* now that we are actually fully mapped, make the start/end more fine grained */ - if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_start = round_down(__pa(_stext), - SWAPPER_BLOCK_SIZE); + if (!IS_ENABLED(CONFIG_DEBUG_RODATA)) + return; - create_mapping(aligned_start, __phys_to_virt(aligned_start), - __pa(_stext) - aligned_start, - PAGE_KERNEL); - } + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_ROX); +} - if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) { - unsigned long aligned_end = round_up(__pa(__init_end), - SWAPPER_BLOCK_SIZE); - create_mapping(__pa(__init_end), (unsigned long)__init_end, - aligned_end - __pa(__init_end), - PAGE_KERNEL); - } -#endif +void fixup_init(void) +{ + /* + * Unmap the __init region but leave the VM area in place. This + * prevents the region from being reused for kernel modules, which + * is not supported by kallsyms. + */ + unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void) +static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end, + pgprot_t prot, struct vm_struct *vma) { - create_mapping_late(__pa(_stext), (unsigned long)_stext, - (unsigned long)_etext - (unsigned long)_stext, - PAGE_KERNEL_ROX); + phys_addr_t pa_start = __pa(va_start); + unsigned long size = va_end - va_start; + + BUG_ON(!PAGE_ALIGNED(pa_start)); + BUG_ON(!PAGE_ALIGNED(size)); + + __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, + early_pgtable_alloc); + vma->addr = va_start; + vma->phys_addr = pa_start; + vma->size = size; + vma->flags = VM_MAP; + vma->caller = __builtin_return_address(0); + + vm_area_add_early(vma); } -#endif -void fixup_init(void) +/* + * Create fine-grained mappings for the kernel. + */ +static void __init map_kernel(pgd_t *pgd) { - create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, - (unsigned long)__init_end - (unsigned long)__init_begin, - PAGE_KERNEL); + static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data; + + map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, + &vmlinux_init); + map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + + if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { + /* + * The fixmap falls in a separate pgd to the kernel, and doesn't + * live in the carveout for the swapper_pg_dir. We can simply + * re-use the existing dir for the fixmap. + */ + set_pgd(pgd_offset_raw(pgd, FIXADDR_START), + *pgd_offset_k(FIXADDR_START)); + } else if (CONFIG_PGTABLE_LEVELS > 3) { + /* + * The fixmap shares its top level pgd entry with the kernel + * mapping. This can really only occur when we are running + * with 16k/4 levels, so we can simply reuse the pud level + * entry instead. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), + __pud(__pa(bm_pmd) | PUD_TYPE_TABLE)); + pud_clear_fixmap(); + } else { + BUG(); + } + + kasan_copy_shadow(pgd); } /* @@ -444,28 +528,35 @@ void fixup_init(void) */ void __init paging_init(void) { - void *zero_page; + phys_addr_t pgd_phys = early_pgtable_alloc(); + pgd_t *pgd = pgd_set_fixmap(pgd_phys); - map_mem(); - fixup_executable(); + map_kernel(pgd); + map_mem(pgd); - /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); - - bootmem_init(); - - empty_zero_page = virt_to_page(zero_page); + /* + * We want to reuse the original swapper_pg_dir so we don't have to + * communicate the new address to non-coherent secondaries in + * secondary_entry, and so cpu_switch_mm can generate the address with + * adrp+add rather than a load from some global variable. + * + * To do this we need to go via a temporary pgd. + */ + cpu_replace_ttbr1(__va(pgd_phys)); + memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + cpu_replace_ttbr1(swapper_pg_dir); - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); + pgd_clear_fixmap(); + memblock_free(pgd_phys, PAGE_SIZE); /* - * TTBR0 is only used for the identity mapping at this stage. Make it - * point to zero page to avoid speculatively fetching new entries. + * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd + * allocated with it. */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, + SWAPPER_DIR_SIZE - PAGE_SIZE); + + bootmem_init(); } /* @@ -552,21 +643,13 @@ void vmemmap_free(unsigned long start, unsigned long end) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - static inline pud_t * fixmap_pud(unsigned long addr) { pgd_t *pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - return pud_offset(pgd, addr); + return pud_offset_kimg(pgd, addr); } static inline pmd_t * fixmap_pmd(unsigned long addr) @@ -575,16 +658,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr) BUG_ON(pud_none(*pud) || pud_bad(*pud)); - return pmd_offset(pud, addr); + return pmd_offset_kimg(pud, addr); } static inline pte_t * fixmap_pte(unsigned long addr) { - pmd_t *pmd = fixmap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); + return &bm_pte[pte_index(addr)]; } void __init early_fixmap_init(void) @@ -595,15 +674,26 @@ void __init early_fixmap_init(void) unsigned long addr = FIXADDR_START; pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); + if (CONFIG_PGTABLE_LEVELS > 3 && + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { + /* + * We only end up here if the kernel mapping and the fixmap + * share the top level pgd entry, which should only happen on + * 16k/4 levels configurations. + */ + BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); + pud = pud_offset_kimg(pgd, addr); + } else { + pgd_populate(&init_mm, pgd, bm_pud); + pud = fixmap_pud(addr); + } pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); + pmd = fixmap_pmd(addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which - * we are not preparted: + * we are not prepared: */ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); @@ -642,11 +732,10 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL_RO; - int size, offset; + int offset; void *dt_virt; /* @@ -663,7 +752,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) /* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe - * to call create_mapping() this early. + * to call create_mapping_noalloc() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. @@ -679,21 +768,73 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - SWAPPER_BLOCK_SIZE, prot); + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), + dt_virt_base, SWAPPER_BLOCK_SIZE, prot); if (fdt_check_header(dt_virt) != 0) return NULL; - size = fdt_totalsize(dt_virt); - if (size > MAX_FDT_SIZE) + *size = fdt_totalsize(dt_virt); + if (*size > MAX_FDT_SIZE) return NULL; - if (offset + size > SWAPPER_BLOCK_SIZE) - create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, - round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); + if (offset + *size > SWAPPER_BLOCK_SIZE) + create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot); - memblock_reserve(dt_phys, size); + return dt_virt; +} +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +{ + void *dt_virt; + int size; + + dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + if (!dt_virt) + return NULL; + + memblock_reserve(dt_phys, size); return dt_virt; } + +int __init arch_ioremap_pud_supported(void) +{ + /* only 4k granule supports level 1 block mappings */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return 1; +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PUD_MASK); + set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + BUG_ON(phys & ~PMD_MASK); + set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_sect(*pud)) + return 0; + pud_clear(pud); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_sect(*pmd)) + return 0; + pmd_clear(pmd); + return 1; +} diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index cf6240741134..ca6d268e3313 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/vmalloc.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -36,14 +37,32 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, return 0; } +/* + * This function assumes that the range is mapped with PAGE_SIZE pages. + */ +static int __change_memory_common(unsigned long start, unsigned long size, + pgprot_t set_mask, pgprot_t clear_mask) +{ + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + return ret; +} + static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE*numpages; unsigned long end = start + size; - int ret; - struct page_change_data data; + struct vm_struct *area; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; @@ -51,23 +70,29 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (start < MODULES_VADDR || start >= MODULES_END) - return -EINVAL; - - if (end < MODULES_VADDR || end >= MODULES_END) + /* + * Kernel VA mappings are always live, and splitting live section + * mappings into page mappings may cause TLB conflicts. This means + * we have to ensure that changing the permission bits of the range + * we are operating on does not result in such splitting. + * + * Let's restrict ourselves to mappings created by vmalloc (or vmap). + * Those are guaranteed to consist entirely of page mappings, and + * splitting is never needed. + * + * So check whether the [addr, addr + size) interval is entirely + * covered by precisely one VM area that has the VM_ALLOC flag set. + */ + area = find_vm_area((void *)addr); + if (!area || + end > (unsigned long)area->addr + area->size || + !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) return 0; - data.set_mask = set_mask; - data.clear_mask = clear_mask; - - ret = apply_to_page_range(&init_mm, start, size, change_page_range, - &data); - - flush_tlb_kernel_range(start, end); - return ret; + return __change_memory_common(start, size, set_mask, clear_mask); } int set_memory_ro(unsigned long addr, int numpages) @@ -99,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(PTE_PXN)); } EXPORT_SYMBOL_GPL(set_memory_x); + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long addr = (unsigned long) page_address(page); + + if (enable) + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(PTE_VALID), + __pgprot(0)); + else + __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(0), + __pgprot(PTE_VALID)); +} +#endif diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index cb3ba1b812e7..ae11d4e03d0e 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -static int __init pgd_cache_init(void) +void __init pgd_cache_init(void) { + if (PGD_SIZE == PAGE_SIZE) + return; + /* * Naturally aligned pgds required by the architecture. */ - if (PGD_SIZE != PAGE_SIZE) - pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, - SLAB_PANIC, NULL); - return 0; + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE, + SLAB_PANIC, NULL); } -core_initcall(pgd_cache_init); diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S index d69dffffaa89..984edcda1850 100644 --- a/arch/arm64/mm/proc-macros.S +++ b/arch/arm64/mm/proc-macros.S @@ -74,3 +74,25 @@ msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b8f04b3f2786..0c19534a901e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -140,7 +140,33 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr + .pushsection ".idmap.text", "ax" +/* + * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * + * This is the low-level counterpart to cpu_replace_ttbr1, and should not be + * called by anything else. It can only be executed from a TTBR0 mapping. + */ +ENTRY(idmap_cpu_replace_ttbr1) + mrs x2, daif + msr daifset, #0xf + + adrp x1, empty_zero_page + msr ttbr1_el1, x1 + isb + + tlbi vmalle1 + dsb nsh + isb + + msr ttbr1_el1, x0 + isb + + msr daif, x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection /* * __cpu_setup diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 729f89163bc3..d2256fa97ea0 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG + select BUILDTIME_EXTABLE_SORT select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index b3069fd83468..60e6f07b7e32 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -523,7 +523,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ - ASM_ULONG_INSN fault_addr, except_addr ! \ + .word (fault_addr - .), (except_addr - .) ! \ .previous diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 1960b87c1c8b..6f893d29f1b2 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -60,14 +60,15 @@ static inline long access_ok(int type, const void __user * addr, * use a 32bit (unsigned int) address here. */ +#define ARCH_HAS_RELATIVE_EXTABLE struct exception_table_entry { - unsigned long insn; /* address of insn that is allowed to fault. */ - unsigned long fixup; /* fixup routine */ + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ }; #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ - ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" /* diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f9064449908a..16dbe81c97c9 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -140,12 +140,6 @@ int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fix; - /* If we only stored 32bit addresses in the exception table we can drop - * out if we faulted on a 64bit address. */ - if ((sizeof(regs->iaoq[0]) > sizeof(fix->insn)) - && (regs->iaoq[0] >> 32)) - return 0; - fix = search_exception_tables(regs->iaoq[0]); if (fix) { struct exception_data *d; @@ -155,7 +149,8 @@ int fixup_exception(struct pt_regs *regs) d->fault_space = regs->isr; d->fault_addr = regs->ior; - regs->iaoq[0] = ((fix->fixup) & ~3); + regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; + regs->iaoq[0] &= ~3; /* * NOTE: In some cases the faulting instruction * may be in the delay slot of a branch. We diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 43686043e297..2734c005da21 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -31,6 +31,7 @@ #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \ 0x00000040 +/* Reserved - do not use 0x00000004 */ #define PPC_FEATURE_TRUE_LE 0x00000002 #define PPC_FEATURE_PPC_LE 0x00000001 diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7030b035905d..a15fe1d4e84a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -148,23 +148,25 @@ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ + unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0}, + {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, /* - * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), - * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP - * which is 0 if the kernel doesn't support TM. + * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), + * we don't want to turn on TM here, so we use the *_COMP versions + * which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, + PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -195,10 +197,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 2b2ced9dc00a..6dafabb6ae1a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -45,7 +45,8 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(64); + u64 pad[2]; +} __packed __aligned(128); enum zpci_state { ZPCI_FN_STATE_RESERVED, diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index a841e9765bd6..8381c09d2870 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 0010c78c4998..08b1f2f6ea50 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -25,6 +25,8 @@ #define EFI32_LOADER_SIGNATURE "EL32" #define EFI64_LOADER_SIGNATURE "EL64" +#define MAX_CMDLINE_ADDRESS UINT_MAX + #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f8a29d2c97b0..e6a8613fbfb0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -4,6 +4,7 @@ #include <asm/page.h> #include <asm-generic/hugetlb.h> +#define hugepages_supported() cpu_has_pse static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 7af2505f20c2..df6b4eeac0bd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -254,7 +254,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) struct irq_desc *desc; int cpu, vector; - BUG_ON(!data->cfg.vector); + if (!data->cfg.vector) + return; vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 0a850100c594..2658e2af74ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ]; void mce_gen_pool_process(void) { struct llist_node *head; - struct mce_evt_llist *node; + struct mce_evt_llist *node, *tmp; struct mce *mce; head = llist_del_all(&mce_event_llist); @@ -37,7 +37,7 @@ void mce_gen_pool_process(void) return; head = llist_reverse_order(head); - llist_for_each_entry(node, head, llnode) { + llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7eb4ebd3ebea..605cea75eb0d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -697,7 +697,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } - kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) @@ -6495,8 +6494,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); - kvm_load_guest_xcr0(vcpu); - vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); @@ -6519,6 +6516,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto cancel_injection; } + kvm_load_guest_xcr0(vcpu); + if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); @@ -6568,6 +6567,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); + kvm_put_guest_xcr0(vcpu); + /* Interrupt is enabled by handle_external_intr() */ kvm_x86_ops->handle_external_intr(vcpu); @@ -7215,7 +7216,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) * and assume host would use all available bits. * Guest xcr0 would be loaded later. */ - kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); @@ -7224,8 +7224,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - kvm_put_guest_xcr0(vcpu); - if (!vcpu->guest_fpu_loaded) { vcpu->fpu_counter = 0; return; diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 637ab34ed632..ddb2244b06a1 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -33,7 +33,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; - unsigned long page; /* location of the fault page */ + unsigned long addr; /* the requested address */ pteval_t old_presence; /* page presence prior to arming */ bool armed; @@ -70,9 +70,16 @@ unsigned int kmmio_count; static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; static LIST_HEAD(kmmio_probes); -static struct list_head *kmmio_page_list(unsigned long page) +static struct list_head *kmmio_page_list(unsigned long addr) { - return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + + if (!pte) + return NULL; + addr &= page_level_mask(l); + + return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)]; } /* Accessed per-cpu */ @@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) } /* You must be holding RCU read lock. */ -static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) +static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr) { struct list_head *head; struct kmmio_fault_page *f; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); - page &= PAGE_MASK; - head = kmmio_page_list(page); + if (!pte) + return NULL; + addr &= page_level_mask(l); + head = kmmio_page_list(addr); list_for_each_entry_rcu(f, head, list) { - if (f->page == page) + if (f->addr == addr) return f; } return NULL; @@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(f->page, &level); + pte_t *pte = lookup_address(f->addr, &level); if (!pte) { - pr_err("no pte for page 0x%08lx\n", f->page); + pr_err("no pte for addr 0x%08lx\n", f->addr); return -1; } @@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) return -1; } - __flush_tlb_one(f->page); + __flush_tlb_one(f->addr); return 0; } @@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), - f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), + f->addr); f->armed = true; return ret; } @@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, - KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr); f->armed = false; } @@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) struct kmmio_context *ctx; struct kmmio_fault_page *faultpage; int ret = 0; /* default to fault not handled */ + unsigned long page_base = addr; + unsigned int l; + pte_t *pte = lookup_address(addr, &l); + if (!pte) + return -EINVAL; + page_base &= page_level_mask(l); /* * Preemption is now disabled to prevent process switch during @@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) preempt_disable(); rcu_read_lock(); - faultpage = get_kmmio_fault_page(addr); + faultpage = get_kmmio_fault_page(page_base); if (!faultpage) { /* * Either this page fault is not caused by kmmio, or @@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - if (addr == ctx->addr) { + if (page_base == ctx->addr) { /* * A second fault on the same page means some other * condition needs handling by do_page_fault(), the @@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx->active++; ctx->fpage = faultpage; - ctx->probe = get_kmmio_probe(addr); + ctx->probe = get_kmmio_probe(page_base); ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - ctx->addr = addr; + ctx->addr = page_base; if (ctx->probe && ctx->probe->pre_handler) ctx->probe->pre_handler(ctx->probe, regs, addr); @@ -354,12 +371,11 @@ out: } /* You must be holding kmmio_lock. */ -static int add_kmmio_fault_page(unsigned long page) +static int add_kmmio_fault_page(unsigned long addr) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (f) { if (!f->count) arm_kmmio_fault_page(f); @@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page) return -1; f->count = 1; - f->page = page; + f->addr = addr; if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } - list_add_rcu(&f->list, kmmio_page_list(f->page)); + list_add_rcu(&f->list, kmmio_page_list(f->addr)); return 0; } /* You must be holding kmmio_lock. */ -static void release_kmmio_fault_page(unsigned long page, +static void release_kmmio_fault_page(unsigned long addr, struct kmmio_fault_page **release_list) { struct kmmio_fault_page *f; - page &= PAGE_MASK; - f = get_kmmio_fault_page(page); + f = get_kmmio_fault_page(addr); if (!f) return; @@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p) int ret = 0; unsigned long size = 0; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); + unsigned int l; + pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); if (get_kmmio_probe(p->addr)) { ret = -EEXIST; goto out; } + + pte = lookup_address(p->addr, &l); + if (!pte) { + ret = -EINVAL; + goto out; + } + kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) pr_err("Unable to set page fault.\n"); - size += PAGE_SIZE; + size += page_level_size(l); } out: spin_unlock_irqrestore(&kmmio_lock, flags); @@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; + unsigned int l; + pte_t *pte; + + pte = lookup_address(p->addr, &l); + if (!pte) + return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { release_kmmio_fault_page(p->addr + size, &release_list); - size += PAGE_SIZE; + size += page_level_size(l); } list_del_rcu(&p->list); kmmio_count--; diff --git a/block/partition-generic.c b/block/partition-generic.c index ae95e963c5bb..91327dbfbb1d 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -360,15 +360,20 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } + err = hd_ref_init(p); + if (err) { + if (flags & ADDPART_FLAG_WHOLEDISK) + goto out_remove_file; + goto out_del; + } + /* everything is up and running, commence */ rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - - if (!hd_ref_init(p)) - return p; + return p; out_free_info: free_part_info(p); @@ -377,6 +382,8 @@ out_free_stats: out_free: kfree(p); return ERR_PTR(err); +out_remove_file: + device_remove_file(pdev, &dev_attr_whole_disk); out_del: kobject_put(p->holder_dir); device_del(pdev); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 65f50eccd49b..a48824deabc5 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1381,7 +1381,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, mutex_lock(&genpd->lock); - if (!list_empty(&subdomain->slave_links) || subdomain->device_count) { + if (!list_empty(&subdomain->master_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, subdomain->name); ret = -EBUSY; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 423f4ca7d712..80cf8add46ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, bio_segments(bio), blk_rq_bytes(cmd->rq)); + /* + * This bio may be started from the middle of the 'bvec' + * because of bio splitting, so offset from the bvec must + * be passed to iov iterator + */ + iter.iov_offset = bio->bi_iter.bi_bvec_done; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 562b5a4ca7b7..78a39f736c64 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -126,7 +126,7 @@ */ #include <linux/types.h> -static bool verbose = 0; +static int verbose = 0; static int major = PD_MAJOR; static char *name = PD_NAME; static int cluster = 64; @@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV}; static DEFINE_MUTEX(pd_mutex); static DEFINE_SPINLOCK(pd_lock); -module_param(verbose, bool, 0); +module_param(verbose, int, 0); module_param(major, int, 0); module_param(name, charp, 0); module_param(cluster, int, 0); diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 1740d75e8a32..216a94fed5b4 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -117,7 +117,7 @@ */ -static bool verbose = 0; +static int verbose = 0; static int major = PT_MAJOR; static char *name = PT_NAME; static int disable = 0; @@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3}; #include <asm/uaccess.h> -module_param(verbose, bool, 0); +module_param(verbose, int, 0); module_param(major, int, 0); module_param(name, charp, 0); module_param_array(drive0, int, NULL, 0); diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index e98d15eaa799..1827fc4d15c1 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -150,7 +150,7 @@ static int __init weim_parse_dt(struct platform_device *pdev, return ret; } - for_each_child_of_node(pdev->dev.of_node, child) { + for_each_available_child_of_node(pdev->dev.of_node, child) { if (!child->name) continue; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 98fb8821382d..f53b02a6bc05 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -667,6 +667,11 @@ static int core_get_max_pstate(void) if (err) goto skip_tar; + /* For level 1 and 2, bits[23:16] contain the ratio */ + if (tdp_ctrl) + tdp_ratio >>= 16; + + tdp_ratio &= 0xff; /* ratios are only 8 bits long */ if (tdp_ratio - 1 == tar) { max_pstate = tar; pr_debug("max_pstate=TAC %x\n", max_pstate); diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 3d9acc53d247..60fc0fa26fd3 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -225,6 +225,9 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); struct ccp_aes_cmac_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.null_msg = rctx->null_msg; memcpy(state.iv, rctx->iv, sizeof(state.iv)); state.buf_count = rctx->buf_count; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 8ef06fad8b14..ab9945f2cb7a 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -212,6 +212,9 @@ static int ccp_sha_export(struct ahash_request *req, void *out) struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); struct ccp_sha_exp_ctx state; + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + state.type = rctx->type; state.msg_bits = rctx->msg_bits; state.first = rctx->first; diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b6f9f42e2985..a04fea4d0063 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -63,6 +63,14 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, ptr->eptr = upper_32_bits(dma_addr); } +static void copy_talitos_ptr(struct talitos_ptr *dst_ptr, + struct talitos_ptr *src_ptr, bool is_sec1) +{ + dst_ptr->ptr = src_ptr->ptr; + if (!is_sec1) + dst_ptr->eptr = src_ptr->eptr; +} + static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len, bool is_sec1) { @@ -1083,21 +1091,20 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, (areq->src == areq->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen); if (sg_count > 1 && (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, areq->assoclen, &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + tbl_off += ret; } else { to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); desc->ptr[1].j_extent = 0; @@ -1126,11 +1133,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) sg_link_tbl_len += authsize; - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, areq->assoclen, - sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > 1) { - tbl_off += ret; + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + + areq->assoclen, 0); + } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, + areq->assoclen, sg_link_tbl_len, + &edesc->link_tbl[tbl_off])) > + 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + tbl_off * @@ -1138,8 +1147,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); - } else - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0); + tbl_off += ret; + } else { + copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + } /* cipher out */ desc->ptr[5].len = cpu_to_be16(cryptlen); @@ -1151,11 +1162,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->icv_ool = false; - if (sg_count > 1 && - (sg_count = sg_to_link_tbl_offset(areq->dst, sg_count, + if (sg_count == 1) { + to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + + areq->assoclen, 0); + } else if ((sg_count = + sg_to_link_tbl_offset(areq->dst, sg_count, areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > - 1) { + &edesc->link_tbl[tbl_off])) > 1) { struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + @@ -1178,8 +1191,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->dma_len, DMA_BIDIRECTIONAL); edesc->icv_ool = true; - } else - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0); + } else { + copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + } /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, @@ -2519,21 +2533,11 @@ struct talitos_crypto_alg { struct talitos_alg_template algt; }; -static int talitos_cra_init(struct crypto_tfm *tfm) +static int talitos_init_common(struct talitos_ctx *ctx, + struct talitos_crypto_alg *talitos_alg) { - struct crypto_alg *alg = tfm->__crt_alg; - struct talitos_crypto_alg *talitos_alg; - struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); struct talitos_private *priv; - if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) - talitos_alg = container_of(__crypto_ahash_alg(alg), - struct talitos_crypto_alg, - algt.alg.hash); - else - talitos_alg = container_of(alg, struct talitos_crypto_alg, - algt.alg.crypto); - /* update context with ptr to dev */ ctx->dev = talitos_alg->dev; @@ -2551,10 +2555,33 @@ static int talitos_cra_init(struct crypto_tfm *tfm) return 0; } +static int talitos_cra_init(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_tfm_ctx(tfm); + + if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH) + talitos_alg = container_of(__crypto_ahash_alg(alg), + struct talitos_crypto_alg, + algt.alg.hash); + else + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.crypto); + + return talitos_init_common(ctx, talitos_alg); +} + static int talitos_cra_init_aead(struct crypto_aead *tfm) { - talitos_cra_init(crypto_aead_tfm(tfm)); - return 0; + struct aead_alg *alg = crypto_aead_alg(tfm); + struct talitos_crypto_alg *talitos_alg; + struct talitos_ctx *ctx = crypto_aead_ctx(tfm); + + talitos_alg = container_of(alg, struct talitos_crypto_alg, + algt.alg.aead); + + return talitos_init_common(ctx, talitos_alg); } static int talitos_cra_init_ahash(struct crypto_tfm *tfm) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 4f099ea29f83..c66133b5e852 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) static void dwc_initialize(struct dw_dma_chan *dwc) { struct dw_dma *dw = to_dw_dma(dwc->chan.device); - struct dw_dma_slave *dws = dwc->chan.private; u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); if (dwc->initialized == true) return; - if (dws) { - /* - * We need controller-specific data to set up slave - * transfers. - */ - BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev); - - cfghi |= DWC_CFGH_DST_PER(dws->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dws->src_id); - } else { - cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); - cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); - } + cfghi |= DWC_CFGH_DST_PER(dwc->dst_id); + cfghi |= DWC_CFGH_SRC_PER(dwc->src_id); channel_writel(dwc, CFG_LO, cfglo); channel_writel(dwc, CFG_HI, cfghi); @@ -936,7 +924,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma_slave *dws = param; - if (!dws || dws->dma_dev != chan->device->dev) + if (dws->dma_dev != chan->device->dev) return false; /* We have to copy data since dws can be temporary storage */ @@ -1160,6 +1148,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ + /* + * We need controller-specific data to set up slave transfers. + */ + if (chan->private && !dw_dma_filter(chan, chan->private)) { + dev_warn(chan2dev(chan), "Wrong controller-specific data\n"); + return -EINVAL; + } + /* Enable controller here if needed */ if (!dw->in_use) dw_dma_on(dw); @@ -1221,6 +1217,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; + + /* Clear custom channel configuration */ + dwc->src_id = 0; + dwc->dst_id = 0; + + dwc->src_master = 0; + dwc->dst_master = 0; + dwc->initialized = false; /* Disable interrupts */ diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index 823ad728aecf..efc02b98e6ba 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc) sr = hsu_chan_readl(hsuc, HSU_CH_SR); spin_unlock_irqrestore(&hsuc->vchan.lock, flags); - return sr; + return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY); } irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr) diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h index f06579c6d548..26da2865b025 100644 --- a/drivers/dma/hsu/hsu.h +++ b/drivers/dma/hsu/hsu.h @@ -41,6 +41,9 @@ #define HSU_CH_SR_DESCTO(x) BIT(8 + (x)) #define HSU_CH_SR_DESCTO_ANY (BIT(11) | BIT(10) | BIT(9) | BIT(8)) #define HSU_CH_SR_CHE BIT(15) +#define HSU_CH_SR_DESCE(x) BIT(16 + (x)) +#define HSU_CH_SR_DESCE_ANY (BIT(19) | BIT(18) | BIT(17) | BIT(16)) +#define HSU_CH_SR_CDESC_ANY (BIT(31) | BIT(30)) /* Bits in HSU_CH_CR */ #define HSU_CH_CR_CHA BIT(0) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index a59061e4221a..55f5d33f6dc7 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -122,6 +122,7 @@ struct pxad_chan { struct pxad_device { struct dma_device slave; int nr_chans; + int nr_requestors; void __iomem *base; struct pxad_phy *phys; spinlock_t phy_lock; /* Phy association */ @@ -473,7 +474,7 @@ static void pxad_free_phy(struct pxad_chan *chan) return; /* clear the channel mapping in DRCMR */ - if (chan->drcmr <= DRCMR_CHLNUM) { + if (chan->drcmr <= pdev->nr_requestors) { reg = pxad_drcmr(chan->drcmr); writel_relaxed(0, chan->phy->base + reg); } @@ -509,6 +510,7 @@ static bool is_running_chan_misaligned(struct pxad_chan *chan) static void phy_enable(struct pxad_phy *phy, bool misaligned) { + struct pxad_device *pdev; u32 reg, dalgn; if (!phy->vchan) @@ -518,7 +520,8 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned) "%s(); phy=%p(%d) misaligned=%d\n", __func__, phy, phy->idx, misaligned); - if (phy->vchan->drcmr <= DRCMR_CHLNUM) { + pdev = to_pxad_dev(phy->vchan->vc.chan.device); + if (phy->vchan->drcmr <= pdev->nr_requestors) { reg = pxad_drcmr(phy->vchan->drcmr); writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); } @@ -914,6 +917,7 @@ static void pxad_get_config(struct pxad_chan *chan, { u32 maxburst = 0, dev_addr = 0; enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); *dcmd = 0; if (dir == DMA_DEV_TO_MEM) { @@ -922,7 +926,7 @@ static void pxad_get_config(struct pxad_chan *chan, dev_addr = chan->cfg.src_addr; *dev_src = dev_addr; *dcmd |= PXA_DCMD_INCTRGADDR; - if (chan->drcmr <= DRCMR_CHLNUM) + if (chan->drcmr <= pdev->nr_requestors) *dcmd |= PXA_DCMD_FLOWSRC; } if (dir == DMA_MEM_TO_DEV) { @@ -931,7 +935,7 @@ static void pxad_get_config(struct pxad_chan *chan, dev_addr = chan->cfg.dst_addr; *dev_dst = dev_addr; *dcmd |= PXA_DCMD_INCSRCADDR; - if (chan->drcmr <= DRCMR_CHLNUM) + if (chan->drcmr <= pdev->nr_requestors) *dcmd |= PXA_DCMD_FLOWTRG; } if (dir == DMA_MEM_TO_MEM) @@ -1341,13 +1345,15 @@ static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, static int pxad_init_dmadev(struct platform_device *op, struct pxad_device *pdev, - unsigned int nr_phy_chans) + unsigned int nr_phy_chans, + unsigned int nr_requestors) { int ret; unsigned int i; struct pxad_chan *c; pdev->nr_chans = nr_phy_chans; + pdev->nr_requestors = nr_requestors; INIT_LIST_HEAD(&pdev->slave.channels); pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; pdev->slave.device_free_chan_resources = pxad_free_chan_resources; @@ -1382,7 +1388,7 @@ static int pxad_probe(struct platform_device *op) const struct of_device_id *of_id; struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); struct resource *iores; - int ret, dma_channels = 0; + int ret, dma_channels = 0, nb_requestors = 0; const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES; @@ -1399,13 +1405,23 @@ static int pxad_probe(struct platform_device *op) return PTR_ERR(pdev->base); of_id = of_match_device(pxad_dt_ids, &op->dev); - if (of_id) + if (of_id) { of_property_read_u32(op->dev.of_node, "#dma-channels", &dma_channels); - else if (pdata && pdata->dma_channels) + ret = of_property_read_u32(op->dev.of_node, "#dma-requests", + &nb_requestors); + if (ret) { + dev_warn(pdev->slave.dev, + "#dma-requests set to default 32 as missing in OF: %d", + ret); + nb_requestors = 32; + }; + } else if (pdata && pdata->dma_channels) { dma_channels = pdata->dma_channels; - else + nb_requestors = pdata->nb_requestors; + } else { dma_channels = 32; /* default 32 channel */ + } dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); @@ -1422,7 +1438,7 @@ static int pxad_probe(struct platform_device *op) pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; pdev->slave.dev = &op->dev; - ret = pxad_init_dmadev(op, pdev, dma_channels); + ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors); if (ret) { dev_err(pdev->slave.dev, "unable to register\n"); return ret; @@ -1441,7 +1457,8 @@ static int pxad_probe(struct platform_device *op) platform_set_drvdata(op, pdev); pxad_init_debugfs(pdev); - dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels); + dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n", + dma_channels, nb_requestors); return 0; } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 01087a38da22..792bdae2b91d 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1866,7 +1866,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, i7_dev = get_i7core_dev(mce->socketid); if (!i7_dev) - return NOTIFY_BAD; + return NOTIFY_DONE; mci = i7_dev->mci; pvt = mci->pvt_info; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index cbee3179ec08..37649221f81c 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } ch_way = TAD_CH(reg) + 1; - sck_way = 1 << TAD_SOCK(reg); + sck_way = TAD_SOCK(reg); if (ch_way == 3) idx = addr >> 6; @@ -1435,7 +1435,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, switch(ch_way) { case 2: case 4: - sck_xch = 1 << sck_way * (ch_way >> 1); + sck_xch = (1 << sck_way) * (ch_way >> 1); break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); @@ -1471,7 +1471,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ch_addr = addr - offset; ch_addr >>= (6 + shiftup); - ch_addr /= ch_way * sck_way; + ch_addr /= sck_xch; ch_addr <<= (6 + shiftup); ch_addr |= addr & ((1 << (6 + shiftup)) - 1); @@ -2254,7 +2254,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, mci = get_mci_for_node_id(mce->socketid); if (!mci) - return NOTIFY_BAD; + return NOTIFY_DONE; pvt = mci->pvt_info; /* diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c index 9f9ea334399c..b6cb30d207be 100644 --- a/drivers/extcon/extcon-max77843.c +++ b/drivers/extcon/extcon-max77843.c @@ -803,7 +803,7 @@ static int max77843_muic_probe(struct platform_device *pdev) /* Clear IRQ bits before request IRQs */ ret = regmap_bulk_read(max77843->regmap_muic, MAX77843_MUIC_REG_INT1, info->status, - MAX77843_MUIC_IRQ_NUM); + MAX77843_MUIC_STATUS_NUM); if (ret) { dev_err(&pdev->dev, "Failed to Clear IRQ bits\n"); goto err_muic_irq; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 027ca212179f..3b52677f459a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -180,6 +180,7 @@ static int generic_ops_register(void) { generic_ops.get_variable = efi.get_variable; generic_ops.set_variable = efi.set_variable; + generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking; generic_ops.get_next_variable = efi.get_next_variable; generic_ops.query_variable_store = efi_query_variable_store; diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 3c0467d3688c..c4098748e1fe 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -8,7 +8,7 @@ cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -fPIC -fno-strict-aliasing -mno-red-zone \ - -mno-mmx -mno-sse -DDISABLE_BRANCH_PROFILING + -mno-mmx -mno-sse cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ @@ -16,7 +16,7 @@ cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt -KBUILD_CFLAGS := $(cflags-y) \ +KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) @@ -34,7 +34,8 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \ $(patsubst %.c,lib-%.o,$(arm-deps)) -lib-$(CONFIG_ARM64) += arm64-stub.o +lib-$(CONFIG_ARM) += arm32-stub.o +lib-$(CONFIG_ARM64) += arm64-stub.o random.o CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 950c87f5d279..d5aa1d16154f 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -18,6 +18,8 @@ #include "efistub.h" +bool __nokaslr; + static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg) { static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID; @@ -207,14 +209,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi_err(sys_table, "Failed to find DRAM base\n"); goto fail; } - status = handle_kernel_image(sys_table, image_addr, &image_size, - &reserve_addr, - &reserve_size, - dram_base, image); - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to relocate kernel\n"); - goto fail; - } /* * Get the command line from EFI, using the LOADED_IMAGE @@ -224,7 +218,28 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size); if (!cmdline_ptr) { pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n"); - goto fail_free_image; + goto fail; + } + + /* check whether 'nokaslr' was passed on the command line */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + static const u8 default_cmdline[] = CONFIG_CMDLINE; + const u8 *str, *cmdline = cmdline_ptr; + + if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) + cmdline = default_cmdline; + str = strstr(cmdline, "nokaslr"); + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + __nokaslr = true; + } + + status = handle_kernel_image(sys_table, image_addr, &image_size, + &reserve_addr, + &reserve_size, + dram_base, image); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + goto fail_free_cmdline; } status = efi_parse_options(cmdline_ptr); @@ -244,7 +259,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Failed to load device tree!\n"); - goto fail_free_cmdline; + goto fail_free_image; } } @@ -286,12 +301,11 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_free(sys_table, initrd_size, initrd_addr); efi_free(sys_table, fdt_size, fdt_addr); -fail_free_cmdline: - efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); - fail_free_image: efi_free(sys_table, image_size, *image_addr); efi_free(sys_table, reserve_size, reserve_addr); +fail_free_cmdline: + efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); fail: return EFI_ERROR; } diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 78dfbd34b6bf..e0e6b74fef8f 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -13,6 +13,10 @@ #include <asm/efi.h> #include <asm/sections.h> +#include "efistub.h" + +extern bool __nokaslr; + efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, unsigned long *image_addr, unsigned long *image_size, @@ -23,26 +27,52 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; - unsigned long nr_pages; void *old_image_addr = (void *)*image_addr; unsigned long preferred_offset; + u64 phys_seed = 0; + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + if (!__nokaslr) { + status = efi_get_random_bytes(sys_table_arg, + sizeof(phys_seed), + (u8 *)&phys_seed); + if (status == EFI_NOT_FOUND) { + pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n"); + } else if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n"); + return status; + } + } else { + pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n"); + } + } /* * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond * a 2 MB aligned base, which itself may be lower than dram_base, as * long as the resulting offset equals or exceeds it. */ - preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET; + preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET; if (preferred_offset < dram_base) - preferred_offset += SZ_2M; + preferred_offset += MIN_KIMG_ALIGN; - /* Relocate the image, if required. */ kernel_size = _edata - _text; - if (*image_addr != preferred_offset) { - kernel_memsize = kernel_size + (_end - _edata); + kernel_memsize = kernel_size + (_end - _edata); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) { + /* + * If KASLR is enabled, and we have some randomness available, + * locate the kernel at a randomized offset in physical memory. + */ + *reserve_size = kernel_memsize + TEXT_OFFSET; + status = efi_random_alloc(sys_table_arg, *reserve_size, + MIN_KIMG_ALIGN, reserve_addr, + phys_seed); + *image_addr = *reserve_addr + TEXT_OFFSET; + } else { /* - * First, try a straight allocation at the preferred offset. + * Else, try a straight allocation at the preferred offset. * This will work around the issue where, if dram_base == 0x0, * efi_low_alloc() refuses to allocate at 0x0 (to prevent the * address of the allocation to be mistaken for a FAIL return @@ -52,27 +82,31 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, * Mustang), we can still place the kernel at the address * 'dram_base + TEXT_OFFSET'. */ + if (*image_addr == preferred_offset) + return EFI_SUCCESS; + *image_addr = *reserve_addr = preferred_offset; - nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / - EFI_PAGE_SIZE; + *reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN); + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, + EFI_LOADER_DATA, + *reserve_size / EFI_PAGE_SIZE, (efi_physical_addr_t *)reserve_addr); - if (status != EFI_SUCCESS) { - kernel_memsize += TEXT_OFFSET; - status = efi_low_alloc(sys_table_arg, kernel_memsize, - SZ_2M, reserve_addr); + } - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); - return status; - } - *image_addr = *reserve_addr + TEXT_OFFSET; + if (status != EFI_SUCCESS) { + *reserve_size = kernel_memsize + TEXT_OFFSET; + status = efi_low_alloc(sys_table_arg, *reserve_size, + MIN_KIMG_ALIGN, reserve_addr); + + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); + *reserve_size = 0; + return status; } - memcpy((void *)*image_addr, old_image_addr, kernel_size); - *reserve_size = kernel_memsize; + *image_addr = *reserve_addr + TEXT_OFFSET; } - + memcpy((void *)*image_addr, old_image_addr, kernel_size); return EFI_SUCCESS; } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index f07d4a67fa76..29ed2f9b218c 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -649,6 +649,10 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) return dst; } +#ifndef MAX_CMDLINE_ADDRESS +#define MAX_CMDLINE_ADDRESS ULONG_MAX +#endif + /* * Convert the unicode UEFI command line to ASCII to pass to kernel. * Size of memory allocated return in *cmd_line_len. @@ -684,7 +688,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, options_bytes++; /* NUL termination */ - status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr); + status = efi_high_alloc(sys_table_arg, options_bytes, 0, + &cmdline_addr, MAX_CMDLINE_ADDRESS); if (status != EFI_SUCCESS) return NULL; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 6b6548fda089..5ed3d3f38166 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -43,4 +43,11 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count); +efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table, + unsigned long size, u8 *out); + +efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, + unsigned long size, unsigned long align, + unsigned long *addr, unsigned long random_seed); + #endif diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index b62e2f5dcab3..b1c22cf18f7d 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -147,6 +147,20 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + efi_status_t efi_status; + + efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64), + (u8 *)&fdt_val64); + if (efi_status == EFI_SUCCESS) { + status = fdt_setprop(fdt, node, "kaslr-seed", + &fdt_val64, sizeof(fdt_val64)); + if (status) + goto fdt_set_fail; + } else if (efi_status != EFI_NOT_FOUND) { + return efi_status; + } + } return EFI_SUCCESS; fdt_set_fail: diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c new file mode 100644 index 000000000000..53f6d3fe6d86 --- /dev/null +++ b/drivers/firmware/efi/libstub/random.c @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2016 Linaro Ltd; <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <asm/efi.h> + +#include "efistub.h" + +struct efi_rng_protocol { + efi_status_t (*get_info)(struct efi_rng_protocol *, + unsigned long *, efi_guid_t *); + efi_status_t (*get_rng)(struct efi_rng_protocol *, + efi_guid_t *, unsigned long, u8 *out); +}; + +efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg, + unsigned long size, u8 *out) +{ + efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; + efi_status_t status; + struct efi_rng_protocol *rng; + + status = efi_call_early(locate_protocol, &rng_proto, NULL, + (void **)&rng); + if (status != EFI_SUCCESS) + return status; + + return rng->get_rng(rng, NULL, size, out); +} + +/* + * Return the number of slots covered by this entry, i.e., the number of + * addresses it covers that are suitably aligned and supply enough room + * for the allocation. + */ +static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + unsigned long size, + unsigned long align) +{ + u64 start, end; + + if (md->type != EFI_CONVENTIONAL_MEMORY) + return 0; + + start = round_up(md->phys_addr, align); + end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size, + align); + + if (start > end) + return 0; + + return (end - start + 1) / align; +} + +/* + * The UEFI memory descriptors have a virtual address field that is only used + * when installing the virtual mapping using SetVirtualAddressMap(). Since it + * is unused here, we can reuse it to keep track of each descriptor's slot + * count. + */ +#define MD_NUM_SLOTS(md) ((md)->virt_addr) + +efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, + unsigned long size, + unsigned long align, + unsigned long *addr, + unsigned long random_seed) +{ + unsigned long map_size, desc_size, total_slots = 0, target_slot; + efi_status_t status; + efi_memory_desc_t *memory_map; + int map_offset; + + status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size, + &desc_size, NULL, NULL); + if (status != EFI_SUCCESS) + return status; + + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; + + /* count the suitable slots in each memory map entry */ + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + unsigned long slots; + + slots = get_entry_num_slots(md, size, align); + MD_NUM_SLOTS(md) = slots; + total_slots += slots; + } + + /* find a random number between 0 and total_slots */ + target_slot = (total_slots * (u16)random_seed) >> 16; + + /* + * target_slot is now a value in the range [0, total_slots), and so + * it corresponds with exactly one of the suitable slots we recorded + * when iterating over the memory map the first time around. + * + * So iterate over the memory map again, subtracting the number of + * slots of each entry at each iteration, until we have found the entry + * that covers our chosen slot. Use the residual value of target_slot + * to calculate the randomly chosen address, and allocate it directly + * using EFI_ALLOCATE_ADDRESS. + */ + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + efi_physical_addr_t target; + unsigned long pages; + + if (target_slot >= MD_NUM_SLOTS(md)) { + target_slot -= MD_NUM_SLOTS(md); + continue; + } + + target = round_up(md->phys_addr, align) + target_slot * align; + pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, pages, &target); + if (status == EFI_SUCCESS) + *addr = target; + break; + } + + efi_call_early(free_pool, memory_map); + + return status; +} diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 7f2ea21c730d..6f182fd91a6d 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -202,29 +202,44 @@ static const struct variable_validate variable_validate[] = { { NULL_GUID, "", NULL }, }; +/* + * Check if @var_name matches the pattern given in @match_name. + * + * @var_name: an array of @len non-NUL characters. + * @match_name: a NUL-terminated pattern string, optionally ending in "*". A + * final "*" character matches any trailing characters @var_name, + * including the case when there are none left in @var_name. + * @match: on output, the number of non-wildcard characters in @match_name + * that @var_name matches, regardless of the return value. + * @return: whether @var_name fully matches @match_name. + */ static bool variable_matches(const char *var_name, size_t len, const char *match_name, int *match) { for (*match = 0; ; (*match)++) { char c = match_name[*match]; - char u = var_name[*match]; - /* Wildcard in the matching name means we've matched */ - if (c == '*') + switch (c) { + case '*': + /* Wildcard in @match_name means we've matched. */ return true; - /* Case sensitive match */ - if (!c && *match == len) - return true; + case '\0': + /* @match_name has ended. Has @var_name too? */ + return (*match == len); - if (c != u) + default: + /* + * We've reached a non-wildcard char in @match_name. + * Continue only if there's an identical character in + * @var_name. + */ + if (*match < len && c == var_name[*match]) + continue; return false; - - if (!c) - return true; + } } - return true; } bool diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bb1099c549df..053fc2f465df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1673,6 +1673,7 @@ struct amdgpu_uvd { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + unsigned fw_version; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; struct delayed_work idle_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 8ac49812a716..5a8fbadbd27b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -63,10 +63,6 @@ bool amdgpu_has_atpx(void) { return amdgpu_atpx_priv.atpx_detected; } -bool amdgpu_has_atpx_dgpu_power_cntl(void) { - return amdgpu_atpx_priv.atpx.functions.power_cntl; -} - /** * amdgpu_atpx_call - call an ATPX method * @@ -146,6 +142,10 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas */ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) { + /* make sure required functions are enabled */ + /* dGPU power control is required */ + atpx->functions.power_cntl = true; + if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9d88023df836..c961fe093e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -61,12 +61,6 @@ static const char *amdgpu_asic_name[] = { "LAST", }; -#if defined(CONFIG_VGA_SWITCHEROO) -bool amdgpu_has_atpx_dgpu_power_cntl(void); -#else -static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } -#endif - bool amdgpu_device_is_px(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -1475,7 +1469,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_runtime_pm == 1) runtime = true; - if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl()) + if (amdgpu_device_is_px(ddev)) runtime = true; vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime); if (runtime) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e23843f4d877..4488e82f87b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file fw_info.feature = adev->vce.fb_version; break; case AMDGPU_INFO_FW_UVD: - fw_info.ver = 0; + fw_info.ver = adev->uvd.fw_version; fw_info.feature = 0; break; case AMDGPU_INFO_FW_GMC: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 064ebb347074..89df7871653d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -52,7 +52,7 @@ struct amdgpu_hpd; #define AMDGPU_MAX_HPD_PINS 6 #define AMDGPU_MAX_CRTCS 6 -#define AMDGPU_MAX_AFMT_BLOCKS 7 +#define AMDGPU_MAX_AFMT_BLOCKS 9 enum amdgpu_rmx_type { RMX_OFF, @@ -308,8 +308,8 @@ struct amdgpu_mode_info { struct atom_context *atom_context; struct card_info *atom_card_info; bool mode_config_initialized; - struct amdgpu_crtc *crtcs[6]; - struct amdgpu_afmt *afmt[7]; + struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; + struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; /* DVI-I properties */ struct drm_property *coherent_mode_property; /* DAC enable load detect */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 53f987aeeacf..3b35ad83867c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -156,6 +156,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", version_major, version_minor, family_id); + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | + (family_id << 8)); + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, @@ -273,6 +276,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset, (adev->uvd.fw->size) - offset); + cancel_delayed_work_sync(&adev->uvd.idle_work); + size = amdgpu_bo_size(adev->uvd.vcpu_bo); size -= le32_to_cpu(hdr->ucode_size_bytes); ptr = adev->uvd.cpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a745eeeb5d82..bb0da76051a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -220,6 +220,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_VCE_HANDLES) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); /* TODO: suspending running encoding sessions isn't supported */ return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index aa491540ba85..946300764609 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3628,7 +3628,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq; + uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; uint64_t addr = ring->fence_drv.gpu_addr; amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index c34c393e9aea..d5e19b5fbbfb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers( union SQ_CMD_BITS *in_reg_sq_cmd, union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) { - int status; + int status = 0; union SQ_CMD_BITS reg_sq_cmd; union GRBM_GFX_INDEX_BITS reg_gfx_index; struct HsaDbgWaveMsgAMDGen2 *pMsg; diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 39d7e2e15c11..d268bf18a662 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1665,13 +1665,19 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb; int len, ret, port_num; + port = drm_dp_get_validated_port_ref(mgr, port); + if (!port) + return -EINVAL; + port_num = port->port_num; mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent); if (!mstb) { mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num); - if (!mstb) + if (!mstb) { + drm_dp_put_port(port); return -EINVAL; + } } txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); @@ -1697,6 +1703,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, kfree(txmsg); fail_put: drm_dp_put_mst_branch_device(mstb); + drm_dp_put_port(port); return ret; } @@ -1779,6 +1786,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) req_payload.start_slot = cur_slots; if (mgr->proposed_vcpis[i]) { port = container_of(mgr->proposed_vcpis[i], struct drm_dp_mst_port, vcpi); + port = drm_dp_get_validated_port_ref(mgr, port); + if (!port) { + mutex_unlock(&mgr->payload_lock); + return -EINVAL; + } req_payload.num_slots = mgr->proposed_vcpis[i]->num_slots; } else { port = NULL; @@ -1804,6 +1816,9 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr) mgr->payloads[i].payload_state = req_payload.payload_state; } cur_slots += req_payload.num_slots; + + if (port) + drm_dp_put_port(port); } for (i = 0; i < mgr->max_payloads; i++) { @@ -2109,6 +2124,8 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr) if (mgr->mst_primary) { int sret; + u8 guid[16]; + sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE); if (sret != DP_RECEIVER_CAP_SIZE) { DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); @@ -2123,6 +2140,16 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr) ret = -1; goto out_unlock; } + + /* Some hubs forget their guids after they resume */ + sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); + if (sret != 16) { + DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n"); + ret = -1; + goto out_unlock; + } + drm_dp_check_mstb_guid(mgr->mst_primary, guid); + ret = 0; } else ret = -1; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f859a5b87ed4..afa81691163d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4447,7 +4447,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX); return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, DRM_ROTATE_0, + &state->scaler_state.scaler_id, BIT(DRM_ROTATE_0), state->pipe_src_w, state->pipe_src_h, adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 0639275fc471..06bd9257acdc 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -477,6 +477,8 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, struct intel_connector *intel_connector = to_intel_connector(connector); struct drm_device *dev = connector->dev; + intel_connector->unregister(intel_connector); + /* need to nuke the connector */ drm_modeset_lock_all(dev); if (connector->state->crtc) { @@ -490,11 +492,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, WARN(ret, "Disabling mst crtc failed with %i\n", ret); } - drm_modeset_unlock_all(dev); - intel_connector->unregister(intel_connector); - - drm_modeset_lock_all(dev); intel_connector_remove_from_fbdev(intel_connector); drm_connector_cleanup(connector); drm_modeset_unlock_all(dev); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d69547a65dbb..7058f75c7b42 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -776,11 +776,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f6b2a814e629..9d48443bca2e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1922,6 +1922,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req, return 0; } +static void cleanup_phys_status_page(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = to_i915(ring->dev); + + if (!dev_priv->status_page_dmah) + return; + + drm_pci_free(ring->dev, dev_priv->status_page_dmah); + ring->status_page.page_addr = NULL; +} + static void cleanup_status_page(struct intel_engine_cs *ring) { struct drm_i915_gem_object *obj; @@ -1938,9 +1949,9 @@ static void cleanup_status_page(struct intel_engine_cs *ring) static int init_status_page(struct intel_engine_cs *ring) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = ring->status_page.obj; - if ((obj = ring->status_page.obj) == NULL) { + if (obj == NULL) { unsigned flags; int ret; @@ -2134,7 +2145,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto error; } else { - BUG_ON(ring->id != RCS); + WARN_ON(ring->id != RCS); ret = init_phys_status_page(ring); if (ret) goto error; @@ -2179,7 +2190,12 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) if (ring->cleanup) ring->cleanup(ring); - cleanup_status_page(ring); + if (I915_NEED_GFX_HWS(ring->dev)) { + cleanup_status_page(ring); + } else { + WARN_ON(ring->id != RCS); + cleanup_phys_status_page(ring); + } i915_cmd_parser_fini_ring(ring); i915_gem_batch_pool_fini(&ring->batch_pool); @@ -2341,11 +2357,11 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes) if (unlikely(total_bytes > remain_usable)) { /* * The base request will fit but the reserved space - * falls off the end. So only need to to wait for the - * reserved size after flushing out the remainder. + * falls off the end. So don't need an immediate wrap + * and only need to effectively wait for the reserved + * size space from the start of ringbuffer. */ wait_bytes = remain_actual + ringbuf->reserved_size; - need_wrap = true; } else if (total_bytes > ringbuf->space) { /* No wrapping required, just waiting. */ wait_bytes = total_bytes; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 43cba129a0c0..cc91ae832ffb 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1132,7 +1132,11 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev) } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { dev_priv->uncore.funcs.force_wake_get = fw_domains_get_with_thread_status; - dev_priv->uncore.funcs.force_wake_put = fw_domains_put; + if (IS_HASWELL(dev)) + dev_priv->uncore.funcs.force_wake_put = + fw_domains_put_with_fifo; + else + dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_ACK_HSW); } else if (IS_IVYBRIDGE(dev)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c index 3216e157a8a0..89da47234016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c @@ -131,7 +131,7 @@ nvkm_ramht_del(struct nvkm_ramht **pramht) struct nvkm_ramht *ramht = *pramht; if (ramht) { nvkm_gpuobj_del(&ramht->gpuobj); - kfree(*pramht); + vfree(*pramht); *pramht = NULL; } } @@ -143,8 +143,8 @@ nvkm_ramht_new(struct nvkm_device *device, u32 size, u32 align, struct nvkm_ramht *ramht; int ret, i; - if (!(ramht = *pramht = kzalloc(sizeof(*ramht) + (size >> 3) * - sizeof(*ramht->data), GFP_KERNEL))) + if (!(ramht = *pramht = vzalloc(sizeof(*ramht) + + (size >> 3) * sizeof(*ramht->data)))) return -ENOMEM; ramht->device = device; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 9f5dfc85147a..36655a74c538 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1717,6 +1717,8 @@ gf100_gr_init(struct gf100_gr *gr) gf100_gr_mmio(gr, gr->func->mmio); + nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); + memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); for (i = 0, gpc = -1; i < gr->tpc_total; i++) { do { diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 183aea1abebc..5edebf495c07 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -375,10 +375,15 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc, qxl_bo_kunmap(user_bo); + qcrtc->cur_x += qcrtc->hot_spot_x - hot_x; + qcrtc->cur_y += qcrtc->hot_spot_y - hot_y; + qcrtc->hot_spot_x = hot_x; + qcrtc->hot_spot_y = hot_y; + cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_SET; - cmd->u.set.position.x = qcrtc->cur_x; - cmd->u.set.position.y = qcrtc->cur_y; + cmd->u.set.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.set.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0); @@ -441,8 +446,8 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc, cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release); cmd->type = QXL_CURSOR_MOVE; - cmd->u.position.x = qcrtc->cur_x; - cmd->u.position.y = qcrtc->cur_y; + cmd->u.position.x = qcrtc->cur_x + qcrtc->hot_spot_x; + cmd->u.position.y = qcrtc->cur_y + qcrtc->hot_spot_y; qxl_release_unmap(qdev, release, &cmd->release_info); qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 01a86948eb8c..3ab90179e9ab 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -135,6 +135,8 @@ struct qxl_crtc { int index; int cur_x; int cur_y; + int hot_spot_x; + int hot_spot_y; }; struct qxl_output { diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 2ad462896896..32491355a1d4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2608,10 +2608,152 @@ static void evergreen_agp_enable(struct radeon_device *rdev) WREG32(VM_CONTEXT1_CNTL, 0); } +static const unsigned ni_dig_offsets[] = +{ + NI_DIG0_REGISTER_OFFSET, + NI_DIG1_REGISTER_OFFSET, + NI_DIG2_REGISTER_OFFSET, + NI_DIG3_REGISTER_OFFSET, + NI_DIG4_REGISTER_OFFSET, + NI_DIG5_REGISTER_OFFSET +}; + +static const unsigned ni_tx_offsets[] = +{ + NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1, + NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1 +}; + +static const unsigned evergreen_dp_offsets[] = +{ + EVERGREEN_DP0_REGISTER_OFFSET, + EVERGREEN_DP1_REGISTER_OFFSET, + EVERGREEN_DP2_REGISTER_OFFSET, + EVERGREEN_DP3_REGISTER_OFFSET, + EVERGREEN_DP4_REGISTER_OFFSET, + EVERGREEN_DP5_REGISTER_OFFSET +}; + + +/* + * Assumption is that EVERGREEN_CRTC_MASTER_EN enable for requested crtc + * We go from crtc to connector and it is not relible since it + * should be an opposite direction .If crtc is enable then + * find the dig_fe which selects this crtc and insure that it enable. + * if such dig_fe is found then find dig_be which selects found dig_be and + * insure that it enable and in DP_SST mode. + * if UNIPHY_PLL_CONTROL1.enable then we should disconnect timing + * from dp symbols clocks . + */ +static bool evergreen_is_dp_sst_stream_enabled(struct radeon_device *rdev, + unsigned crtc_id, unsigned *ret_dig_fe) +{ + unsigned i; + unsigned dig_fe; + unsigned dig_be; + unsigned dig_en_be; + unsigned uniphy_pll; + unsigned digs_fe_selected; + unsigned dig_be_mode; + unsigned dig_fe_mask; + bool is_enabled = false; + bool found_crtc = false; + + /* loop through all running dig_fe to find selected crtc */ + for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) { + dig_fe = RREG32(NI_DIG_FE_CNTL + ni_dig_offsets[i]); + if (dig_fe & NI_DIG_FE_CNTL_SYMCLK_FE_ON && + crtc_id == NI_DIG_FE_CNTL_SOURCE_SELECT(dig_fe)) { + /* found running pipe */ + found_crtc = true; + dig_fe_mask = 1 << i; + dig_fe = i; + break; + } + } + + if (found_crtc) { + /* loop through all running dig_be to find selected dig_fe */ + for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) { + dig_be = RREG32(NI_DIG_BE_CNTL + ni_dig_offsets[i]); + /* if dig_fe_selected by dig_be? */ + digs_fe_selected = NI_DIG_BE_CNTL_FE_SOURCE_SELECT(dig_be); + dig_be_mode = NI_DIG_FE_CNTL_MODE(dig_be); + if (dig_fe_mask & digs_fe_selected && + /* if dig_be in sst mode? */ + dig_be_mode == NI_DIG_BE_DPSST) { + dig_en_be = RREG32(NI_DIG_BE_EN_CNTL + + ni_dig_offsets[i]); + uniphy_pll = RREG32(NI_DCIO_UNIPHY0_PLL_CONTROL1 + + ni_tx_offsets[i]); + /* dig_be enable and tx is running */ + if (dig_en_be & NI_DIG_BE_EN_CNTL_ENABLE && + dig_en_be & NI_DIG_BE_EN_CNTL_SYMBCLK_ON && + uniphy_pll & NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE) { + is_enabled = true; + *ret_dig_fe = dig_fe; + break; + } + } + } + } + + return is_enabled; +} + +/* + * Blank dig when in dp sst mode + * Dig ignores crtc timing + */ +static void evergreen_blank_dp_output(struct radeon_device *rdev, + unsigned dig_fe) +{ + unsigned stream_ctrl; + unsigned fifo_ctrl; + unsigned counter = 0; + + if (dig_fe >= ARRAY_SIZE(evergreen_dp_offsets)) { + DRM_ERROR("invalid dig_fe %d\n", dig_fe); + return; + } + + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + if (!(stream_ctrl & EVERGREEN_DP_VID_STREAM_CNTL_ENABLE)) { + DRM_ERROR("dig %d , should be enable\n", dig_fe); + return; + } + + stream_ctrl &=~EVERGREEN_DP_VID_STREAM_CNTL_ENABLE; + WREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe], stream_ctrl); + + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + while (counter < 32 && stream_ctrl & EVERGREEN_DP_VID_STREAM_STATUS) { + msleep(1); + counter++; + stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL + + evergreen_dp_offsets[dig_fe]); + } + if (counter >= 32 ) + DRM_ERROR("counter exceeds %d\n", counter); + + fifo_ctrl = RREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe]); + fifo_ctrl |= EVERGREEN_DP_STEER_FIFO_RESET; + WREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe], fifo_ctrl); + +} + void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save) { u32 crtc_enabled, tmp, frame_count, blackout; int i, j; + unsigned dig_fe; if (!ASIC_IS_NODCE(rdev)) { save->vga_render_control = RREG32(VGA_RENDER_CONTROL); @@ -2651,7 +2793,17 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav break; udelay(1); } - + /*we should disable dig if it drives dp sst*/ + /*but we are in radeon_device_init and the topology is unknown*/ + /*and it is available after radeon_modeset_init*/ + /*the following method radeon_atom_encoder_dpms_dig*/ + /*does the job if we initialize it properly*/ + /*for now we do it this manually*/ + /**/ + if (ASIC_IS_DCE5(rdev) && + evergreen_is_dp_sst_stream_enabled(rdev, i ,&dig_fe)) + evergreen_blank_dp_output(rdev, dig_fe); + /*we could remove 6 lines below*/ /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1); tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]); diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h index aa939dfed3a3..b436badf9efa 100644 --- a/drivers/gpu/drm/radeon/evergreen_reg.h +++ b/drivers/gpu/drm/radeon/evergreen_reg.h @@ -250,8 +250,43 @@ /* HDMI blocks at 0x7030, 0x7c30, 0x10830, 0x11430, 0x12030, 0x12c30 */ #define EVERGREEN_HDMI_BASE 0x7030 +/*DIG block*/ +#define NI_DIG0_REGISTER_OFFSET (0x7000 - 0x7000) +#define NI_DIG1_REGISTER_OFFSET (0x7C00 - 0x7000) +#define NI_DIG2_REGISTER_OFFSET (0x10800 - 0x7000) +#define NI_DIG3_REGISTER_OFFSET (0x11400 - 0x7000) +#define NI_DIG4_REGISTER_OFFSET (0x12000 - 0x7000) +#define NI_DIG5_REGISTER_OFFSET (0x12C00 - 0x7000) + + +#define NI_DIG_FE_CNTL 0x7000 +# define NI_DIG_FE_CNTL_SOURCE_SELECT(x) ((x) & 0x3) +# define NI_DIG_FE_CNTL_SYMCLK_FE_ON (1<<24) + + +#define NI_DIG_BE_CNTL 0x7140 +# define NI_DIG_BE_CNTL_FE_SOURCE_SELECT(x) (((x) >> 8 ) & 0x3F) +# define NI_DIG_FE_CNTL_MODE(x) (((x) >> 16) & 0x7 ) + +#define NI_DIG_BE_EN_CNTL 0x7144 +# define NI_DIG_BE_EN_CNTL_ENABLE (1 << 0) +# define NI_DIG_BE_EN_CNTL_SYMBCLK_ON (1 << 8) +# define NI_DIG_BE_DPSST 0 /* Display Port block */ +#define EVERGREEN_DP0_REGISTER_OFFSET (0x730C - 0x730C) +#define EVERGREEN_DP1_REGISTER_OFFSET (0x7F0C - 0x730C) +#define EVERGREEN_DP2_REGISTER_OFFSET (0x10B0C - 0x730C) +#define EVERGREEN_DP3_REGISTER_OFFSET (0x1170C - 0x730C) +#define EVERGREEN_DP4_REGISTER_OFFSET (0x1230C - 0x730C) +#define EVERGREEN_DP5_REGISTER_OFFSET (0x12F0C - 0x730C) + + +#define EVERGREEN_DP_VID_STREAM_CNTL 0x730C +# define EVERGREEN_DP_VID_STREAM_CNTL_ENABLE (1 << 0) +# define EVERGREEN_DP_VID_STREAM_STATUS (1 <<16) +#define EVERGREEN_DP_STEER_FIFO 0x7310 +# define EVERGREEN_DP_STEER_FIFO_RESET (1 << 0) #define EVERGREEN_DP_SEC_CNTL 0x7280 # define EVERGREEN_DP_SEC_STREAM_ENABLE (1 << 0) # define EVERGREEN_DP_SEC_ASP_ENABLE (1 << 4) @@ -266,4 +301,15 @@ # define EVERGREEN_DP_SEC_N_BASE_MULTIPLE(x) (((x) & 0xf) << 24) # define EVERGREEN_DP_SEC_SS_EN (1 << 28) +/*DCIO_UNIPHY block*/ +#define NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1 (0x6600 -0x6600) +#define NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1 (0x6640 -0x6600) +#define NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1 (0x6680 - 0x6600) +#define NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1 (0x66C0 - 0x6600) +#define NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1 (0x6700 - 0x6600) +#define NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1 (0x6740 - 0x6600) + +#define NI_DCIO_UNIPHY0_PLL_CONTROL1 0x6618 +# define NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE (1 << 0) + #endif diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 9bc408c9f9f6..c4b4f298a283 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -62,10 +62,6 @@ bool radeon_has_atpx(void) { return radeon_atpx_priv.atpx_detected; } -bool radeon_has_atpx_dgpu_power_cntl(void) { - return radeon_atpx_priv.atpx.functions.power_cntl; -} - /** * radeon_atpx_call - call an ATPX method * @@ -145,6 +141,10 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas */ static int radeon_atpx_validate(struct radeon_atpx *atpx) { + /* make sure required functions are enabled */ + /* dGPU power control is required */ + atpx->functions.power_cntl = true; + if (atpx->functions.px_params) { union acpi_object *info; struct atpx_px_params output; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 340f3f549f29..9cfc1c3e1965 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1996,10 +1996,12 @@ radeon_add_atom_connector(struct drm_device *dev, rdev->mode_info.dither_property, RADEON_FMT_DITHER_DISABLE); - if (radeon_audio != 0) + if (radeon_audio != 0) { drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; + } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.output_csc_property, @@ -2124,6 +2126,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (connector_type == DRM_MODE_CONNECTOR_DVII) { radeon_connector->dac_load_detect = true; @@ -2179,6 +2182,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, @@ -2231,6 +2235,7 @@ radeon_add_atom_connector(struct drm_device *dev, drm_object_attach_property(&radeon_connector->base.base, rdev->mode_info.audio_property, RADEON_AUDIO_AUTO); + radeon_connector->audio = RADEON_AUDIO_AUTO; } if (ASIC_IS_DCE5(rdev)) drm_object_attach_property(&radeon_connector->base.base, diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index f78f111e68de..c566993a2ec3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -103,12 +103,6 @@ static const char radeon_family_name[][16] = { "LAST", }; -#if defined(CONFIG_VGA_SWITCHEROO) -bool radeon_has_atpx_dgpu_power_cntl(void); -#else -static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; } -#endif - #define RADEON_PX_QUIRK_DISABLE_PX (1 << 0) #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1) @@ -1439,7 +1433,7 @@ int radeon_device_init(struct radeon_device *rdev, * ignore it */ vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode); - if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl()) + if (rdev->flags & RADEON_IS_PX) runtime = true; vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime); if (runtime) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index e06ac546a90f..f342aad79cc6 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -235,6 +235,8 @@ static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo); + if (radeon_ttm_tt_has_userptr(bo->ttm)) + return -EPERM; return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp); } diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 7285adb27099..caa73de584a5 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2931,6 +2931,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 }, + { PCI_VENDOR_ID_ATI, 0x6810, 0x1682, 0x9275, 0, 120000 }, { 0, 0, 0, 0 }, }; diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig index 83e9f591a54b..e7a348807f0c 100644 --- a/drivers/hwtracing/stm/Kconfig +++ b/drivers/hwtracing/stm/Kconfig @@ -1,6 +1,7 @@ config STM tristate "System Trace Module devices" select CONFIGFS_FS + select SRCU help A System Trace Module (STM) is a device exporting data in System Trace Protocol (STP) format as defined by MIPI STP standards. diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 714bdc837769..b167ab25310a 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -116,8 +116,8 @@ struct cpm_i2c { cbd_t __iomem *rbase; u_char *txbuf[CPM_MAXBD]; u_char *rxbuf[CPM_MAXBD]; - u32 txdma[CPM_MAXBD]; - u32 rxdma[CPM_MAXBD]; + dma_addr_t txdma[CPM_MAXBD]; + dma_addr_t rxdma[CPM_MAXBD]; }; static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index b29c7500461a..f54ece8fce78 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -671,7 +671,9 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap, return -EIO; } - clk_prepare_enable(i2c->clk); + ret = clk_enable(i2c->clk); + if (ret) + return ret; for (i = 0; i < num; i++, msgs++) { stop = (i == num - 1); @@ -695,7 +697,7 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap, } out: - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); return ret; } @@ -747,7 +749,9 @@ static int exynos5_i2c_probe(struct platform_device *pdev) return -ENOENT; } - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); i2c->regs = devm_ioremap_resource(&pdev->dev, mem); @@ -799,6 +803,10 @@ static int exynos5_i2c_probe(struct platform_device *pdev) platform_set_drvdata(pdev, i2c); + clk_disable(i2c->clk); + + return 0; + err_clk: clk_disable_unprepare(i2c->clk); return ret; @@ -810,6 +818,8 @@ static int exynos5_i2c_remove(struct platform_device *pdev) i2c_del_adapter(&i2c->adap); + clk_unprepare(i2c->clk); + return 0; } @@ -821,6 +831,8 @@ static int exynos5_i2c_suspend_noirq(struct device *dev) i2c->suspended = 1; + clk_unprepare(i2c->clk); + return 0; } @@ -830,7 +842,9 @@ static int exynos5_i2c_resume_noirq(struct device *dev) struct exynos5_i2c *i2c = platform_get_drvdata(pdev); int ret = 0; - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; ret = exynos5_hsi2c_clock_setup(i2c); if (ret) { @@ -839,7 +853,7 @@ static int exynos5_i2c_resume_noirq(struct device *dev) } exynos5_i2c_init(i2c); - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); i2c->suspended = 0; return 0; diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 6b4e8a008bc0..564adf3116e8 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -48,6 +48,7 @@ #include <asm/uaccess.h> +#include <rdma/ib.h> #include <rdma/ib_cm.h> #include <rdma/ib_user_cm.h> #include <rdma/ib_marshall.h> @@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, struct ib_ucm_cmd_hdr hdr; ssize_t result; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 8b5a934e1133..886f61ea6cc7 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, struct rdma_ucm_cmd_hdr hdr; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (len < sizeof(hdr)) return -EINVAL; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index e3ef28861be6..24f3ca2c4ad7 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -48,6 +48,8 @@ #include <asm/uaccess.h> +#include <rdma/ib.h> + #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); @@ -682,6 +684,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, int srcu_key; ssize_t ret; + if (WARN_ON_ONCE(!ib_safe_file_access(filp))) + return -EACCES; + if (count < sizeof hdr) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c4e091528390..721d63f5b461 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -273,7 +273,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(struct mlx5_wqe_ctrl_seg)) / sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); - props->max_sge_rd = props->max_sge; + props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index e449e394963f..24f4a782e0f4 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -45,6 +45,8 @@ #include <linux/export.h> #include <linux/uio.h> +#include <rdma/ib.h> + #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" @@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ssize_t ret = 0; void *dest; + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd.type)) { ret = -EINVAL; goto bail; diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c index 3f02e0e03d12..67aab86048ad 100644 --- a/drivers/input/misc/pmic8xxx-pwrkey.c +++ b/drivers/input/misc/pmic8xxx-pwrkey.c @@ -353,7 +353,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay)) kpd_delay = 15625; - if (kpd_delay > 62500 || kpd_delay == 0) { + /* Valid range of pwr key trigger delay is 1/64 sec to 2 seconds. */ + if (kpd_delay > USEC_PER_SEC * 2 || kpd_delay < USEC_PER_SEC / 64) { dev_err(&pdev->dev, "invalid power key trigger delay\n"); return -EINVAL; } @@ -385,8 +386,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev) pwr->name = "pmic8xxx_pwrkey"; pwr->phys = "pmic8xxx_pwrkey/input0"; - delay = (kpd_delay << 10) / USEC_PER_SEC; - delay = 1 + ilog2(delay); + delay = (kpd_delay << 6) / USEC_PER_SEC; + delay = ilog2(delay); err = regmap_read(regmap, PON_CNTL_1, &pon_cntl); if (err < 0) { diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 3a7f3a4a4396..7c18249d6c8e 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -858,6 +858,14 @@ static int gtco_probe(struct usb_interface *usbinterface, goto err_free_buf; } + /* Sanity check that a device has an endpoint */ + if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + dev_err(&usbinterface->dev, + "Invalid number of endpoints\n"); + error = -EINVAL; + goto err_free_urb; + } + /* * The endpoint is always altsetting 0, we know this since we know * this device only has one interrupt endpoint @@ -879,7 +887,7 @@ static int gtco_probe(struct usb_interface *usbinterface, * HID report descriptor */ if (usb_get_extra_descriptor(usbinterface->cur_altsetting, - HID_DEVICE_TYPE, &hid_desc) != 0){ + HID_DEVICE_TYPE, &hid_desc) != 0) { dev_err(&usbinterface->dev, "Can't retrieve exta USB descriptor to get hid report descriptor length\n"); error = -EIO; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fc836f523afa..b9319b76a8a1 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -91,6 +91,7 @@ struct iommu_dev_data { struct list_head dev_data_list; /* For global dev_data_list */ struct protection_domain *domain; /* Domain the device is bound to */ u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Device is identity mapped */ struct { @@ -125,6 +126,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return PCI_DEVID(pdev->bus->number, pdev->devfn); +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -162,6 +170,68 @@ out_unlock: return dev_data; } +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + *(u16 *)data = alias; + return 0; +} + +static u16 get_alias(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; + + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); + + if (ivrs_alias == pci_alias) + return ivrs_alias; + + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); + } + + return pci_alias; + } + + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); + + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); + } + + return ivrs_alias; +} + static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -174,13 +244,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -static inline u16 get_device_id(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - return PCI_DEVID(pdev->bus->number, pdev->devfn); -} - static struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; @@ -308,6 +371,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; + dev_data->alias = get_alias(dev); + if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -328,7 +393,7 @@ static void iommu_ignore_device(struct device *dev) u16 devid, alias; devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; + alias = get_alias(dev); memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); @@ -1017,7 +1082,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ret = iommu_flush_dte(iommu, dev_data->devid); if (!ret && alias != dev_data->devid) @@ -1891,7 +1956,7 @@ static void do_attach(struct iommu_dev_data *dev_data, bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1925,7 +1990,7 @@ static void do_detach(struct iommu_dev_data *dev_data) return; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = dev_data->alias; /* decrease reference counters */ dev_data->domain->dev_iommu[iommu->index] -= 1; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 72d6182666cb..58f2fe687a24 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -403,7 +403,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int s_length = sg_dma_len(s); unsigned int s_dma_len = s->length; - s->offset = s_offset; + s->offset += s_offset; s->length = s_length; sg_dma_address(s) = dma_addr + s_offset; dma_addr += s_dma_len; @@ -422,7 +422,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents) for_each_sg(sg, s, nents, i) { if (sg_dma_address(s) != DMA_ERROR_CODE) - s->offset = sg_dma_address(s); + s->offset += sg_dma_address(s); if (sg_dma_len(s)) s->length = sg_dma_len(s); sg_dma_address(s) = DMA_ERROR_CODE; diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index efe50845939d..17304705f2cf 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -183,7 +183,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np) void __iomem *icoll_base; icoll_base = of_io_request_and_map(np, 0, np->name); - if (!icoll_base) + if (IS_ERR(icoll_base)) panic("%s: unable to map resource", np->full_name); return icoll_base; } diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 4ef178078e5b..1254e98f6b57 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -154,9 +154,9 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, gc = irq_get_domain_generic_chip(domain, 0); gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node)); - if (!gc->reg_base) { + if (IS_ERR(gc->reg_base)) { pr_err("unable to map resource\n"); - ret = -ENOMEM; + ret = PTR_ERR(gc->reg_base); goto fail_irqd_remove; } diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 27f2ef300f8b..3970cda10080 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -867,39 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, return 0; } -#define WRITE_LOCK(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_write_lock(struct dm_cache_metadata *cmd) +{ + down_write(&cmd->root_lock); + if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { + up_write(&cmd->root_lock); + return false; } + return true; +} -#define WRITE_LOCK_VOID(cmd) \ - down_write(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_write(&cmd->root_lock); \ - return; \ - } +#define WRITE_LOCK(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define WRITE_LOCK_VOID(cmd) \ + do { \ + if (!cmd_write_lock((cmd))) \ + return; \ + } while(0) #define WRITE_UNLOCK(cmd) \ - up_write(&cmd->root_lock) + up_write(&(cmd)->root_lock) -#define READ_LOCK(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return -EINVAL; \ +static bool cmd_read_lock(struct dm_cache_metadata *cmd) +{ + down_read(&cmd->root_lock); + if (cmd->fail_io) { + up_read(&cmd->root_lock); + return false; } + return true; +} -#define READ_LOCK_VOID(cmd) \ - down_read(&cmd->root_lock); \ - if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \ - up_read(&cmd->root_lock); \ - return; \ - } +#define READ_LOCK(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return -EINVAL; \ + } while(0) + +#define READ_LOCK_VOID(cmd) \ + do { \ + if (!cmd_read_lock((cmd))) \ + return; \ + } while(0) #define READ_UNLOCK(cmd) \ - up_read(&cmd->root_lock) + up_read(&(cmd)->root_lock) int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 33bdd81065e8..11f39791ec33 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -1502,7 +1502,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking) * Will sleep if required for nonblocking == false. */ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb, - int nonblocking) + void *pb, int nonblocking) { unsigned long flags; int ret; @@ -1523,10 +1523,10 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb, /* * Only remove the buffer from done_list if v4l2_buffer can handle all * the planes. - * Verifying planes is NOT necessary since it already has been checked - * before the buffer is queued/prepared. So it can never fail. */ - list_del(&(*vb)->done_entry); + ret = call_bufop(q, verify_planes_array, *vb, pb); + if (!ret) + list_del(&(*vb)->done_entry); spin_unlock_irqrestore(&q->done_lock, flags); return ret; @@ -1604,7 +1604,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking) struct vb2_buffer *vb = NULL; int ret; - ret = __vb2_get_done_vb(q, &vb, nonblocking); + ret = __vb2_get_done_vb(q, &vb, pb, nonblocking); if (ret < 0) return ret; diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index dbec5923fcf0..3c3b517f1d1c 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -49,7 +49,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start, nr, write, 1, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 502984c724ff..6c441be8f893 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -67,6 +67,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer return 0; } +static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb) +{ + return __verify_planes_array(vb, pb); +} + /** * __verify_length() - Verify that the bytesused value for each plane fits in * the plane length and that the data offset doesn't exceed the bytesused value. @@ -432,6 +437,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb, } static const struct vb2_buf_ops v4l2_buf_ops = { + .verify_planes_array = __verify_planes_array_core, .fill_user_buffer = __fill_v4l2_buffer, .fill_vb2_buffer = __fill_vb2_buffer, .set_timestamp = __set_timestamp, diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 98c020b560ac..2dcafd0ba17f 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -443,7 +443,7 @@ config ARM_CHARLCD still useful. config BMP085 - bool + tristate depends on SYSFS config BMP085_I2C diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c index 15e88078ba1e..f1a0b99f5a9a 100644 --- a/drivers/misc/ad525x_dpot.c +++ b/drivers/misc/ad525x_dpot.c @@ -216,7 +216,7 @@ static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg) */ value = swab16(value); - if (dpot->uid == DPOT_UID(AD5271_ID)) + if (dpot->uid == DPOT_UID(AD5274_ID)) value = value >> 2; return value; default: diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 09a406058c46..efbb6945eb18 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -288,7 +288,6 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, void cxl_unmap_irq(unsigned int virq, void *cookie) { free_irq(virq, cookie); - irq_dispose_mapping(virq); } static int cxl_register_one_irq(struct cxl *adapter, diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 8310b4dbff06..6a451bd65bf3 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -1511,7 +1511,7 @@ off_t scif_register_pinned_pages(scif_epd_t epd, if ((map_flags & SCIF_MAP_FIXED) && ((ALIGN(offset, PAGE_SIZE) != offset) || (offset < 0) || - (offset + (off_t)len < offset))) + (len > LONG_MAX - offset))) return -EINVAL; might_sleep(); @@ -1614,7 +1614,7 @@ off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, if ((map_flags & SCIF_MAP_FIXED) && ((ALIGN(offset, PAGE_SIZE) != offset) || (offset < 0) || - (offset + (off_t)len < offset))) + (len > LONG_MAX - offset))) return -EINVAL; /* Unsupported protection requested */ @@ -1732,7 +1732,8 @@ scif_unregister(scif_epd_t epd, off_t offset, size_t len) /* Offset is not page aligned or offset+len wraps around */ if ((ALIGN(offset, PAGE_SIZE) != offset) || - (offset + (off_t)len < offset)) + (offset < 0) || + (len > LONG_MAX - offset)) return -EINVAL; err = scif_verify_epd(ep); diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c index 12c6190c6e33..4a07ba1195b5 100644 --- a/drivers/mtd/nand/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/brcmnand/brcmnand.c @@ -309,6 +309,36 @@ static const u16 brcmnand_regs_v60[] = { [BRCMNAND_FC_BASE] = 0x400, }; +/* BRCMNAND v7.1 */ +static const u16 brcmnand_regs_v71[] = { + [BRCMNAND_CMD_START] = 0x04, + [BRCMNAND_CMD_EXT_ADDRESS] = 0x08, + [BRCMNAND_CMD_ADDRESS] = 0x0c, + [BRCMNAND_INTFC_STATUS] = 0x14, + [BRCMNAND_CS_SELECT] = 0x18, + [BRCMNAND_CS_XOR] = 0x1c, + [BRCMNAND_LL_OP] = 0x20, + [BRCMNAND_CS0_BASE] = 0x50, + [BRCMNAND_CS1_BASE] = 0, + [BRCMNAND_CORR_THRESHOLD] = 0xdc, + [BRCMNAND_CORR_THRESHOLD_EXT] = 0xe0, + [BRCMNAND_UNCORR_COUNT] = 0xfc, + [BRCMNAND_CORR_COUNT] = 0x100, + [BRCMNAND_CORR_EXT_ADDR] = 0x10c, + [BRCMNAND_CORR_ADDR] = 0x110, + [BRCMNAND_UNCORR_EXT_ADDR] = 0x114, + [BRCMNAND_UNCORR_ADDR] = 0x118, + [BRCMNAND_SEMAPHORE] = 0x150, + [BRCMNAND_ID] = 0x194, + [BRCMNAND_ID_EXT] = 0x198, + [BRCMNAND_LL_RDATA] = 0x19c, + [BRCMNAND_OOB_READ_BASE] = 0x200, + [BRCMNAND_OOB_READ_10_BASE] = 0, + [BRCMNAND_OOB_WRITE_BASE] = 0x280, + [BRCMNAND_OOB_WRITE_10_BASE] = 0, + [BRCMNAND_FC_BASE] = 0x400, +}; + enum brcmnand_cs_reg { BRCMNAND_CS_CFG_EXT = 0, BRCMNAND_CS_CFG, @@ -404,7 +434,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl) } /* Register offsets */ - if (ctrl->nand_version >= 0x0600) + if (ctrl->nand_version >= 0x0701) + ctrl->reg_offsets = brcmnand_regs_v71; + else if (ctrl->nand_version >= 0x0600) ctrl->reg_offsets = brcmnand_regs_v60; else if (ctrl->nand_version >= 0x0500) ctrl->reg_offsets = brcmnand_regs_v50; diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3ff583f165cd..ce7b2cab5762 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -3979,7 +3979,6 @@ static int nand_dt_init(struct mtd_info *mtd, struct nand_chip *chip, * This is the first phase of the normal nand_scan() function. It reads the * flash ID and sets up MTD fields accordingly. * - * The mtd->owner field must be set to the module of the caller. */ int nand_scan_ident(struct mtd_info *mtd, int maxchips, struct nand_flash_dev *table) @@ -4403,19 +4402,12 @@ EXPORT_SYMBOL(nand_scan_tail); * * This fills out all the uninitialized function pointers with the defaults. * The flash ID is read and the mtd/chip structures are filled with the - * appropriate values. The mtd->owner field must be set to the module of the - * caller. + * appropriate values. */ int nand_scan(struct mtd_info *mtd, int maxchips) { int ret; - /* Many callers got this wrong, so check for it for a while... */ - if (!mtd->owner && caller_is_module()) { - pr_crit("%s called with NULL mtd->owner!\n", __func__); - BUG(); - } - ret = nand_scan_ident(mtd, maxchips, NULL); if (!ret) ret = nand_scan_tail(mtd); diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 32477c4eb421..37e4135ab213 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1067,45 +1067,6 @@ static int spansion_quad_enable(struct spi_nor *nor) return 0; } -static int micron_quad_enable(struct spi_nor *nor) -{ - int ret; - u8 val; - - ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1); - if (ret < 0) { - dev_err(nor->dev, "error %d reading EVCR\n", ret); - return ret; - } - - write_enable(nor); - - /* set EVCR, enable quad I/O */ - nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON; - ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1); - if (ret < 0) { - dev_err(nor->dev, "error while writing EVCR register\n"); - return ret; - } - - ret = spi_nor_wait_till_ready(nor); - if (ret) - return ret; - - /* read EVCR and check it */ - ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1); - if (ret < 0) { - dev_err(nor->dev, "error %d reading EVCR\n", ret); - return ret; - } - if (val & EVCR_QUAD_EN_MICRON) { - dev_err(nor->dev, "Micron EVCR Quad bit not clear\n"); - return -EINVAL; - } - - return 0; -} - static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info) { int status; @@ -1119,12 +1080,7 @@ static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info) } return status; case SNOR_MFR_MICRON: - status = micron_quad_enable(nor); - if (status) { - dev_err(nor->dev, "Micron quad-read not enabled\n"); - return -EINVAL; - } - return status; + return 0; default: status = spansion_quad_enable(nor); if (status) { diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 973dade2d07f..1257b18e6b90 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -270,11 +270,17 @@ jme_reset_mac_processor(struct jme_adapter *jme) } static inline void -jme_clear_pm(struct jme_adapter *jme) +jme_clear_pm_enable_wol(struct jme_adapter *jme) { jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs); } +static inline void +jme_clear_pm_disable_wol(struct jme_adapter *jme) +{ + jwrite32(jme, JME_PMCS, PMCS_STMASK); +} + static int jme_reload_eeprom(struct jme_adapter *jme) { @@ -1853,7 +1859,7 @@ jme_open(struct net_device *netdev) struct jme_adapter *jme = netdev_priv(netdev); int rc; - jme_clear_pm(jme); + jme_clear_pm_disable_wol(jme); JME_NAPI_ENABLE(jme); tasklet_init(&jme->linkch_task, jme_link_change_tasklet, @@ -1925,11 +1931,11 @@ jme_wait_link(struct jme_adapter *jme) static void jme_powersave_phy(struct jme_adapter *jme) { - if (jme->reg_pmcs) { + if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) { jme_set_100m_half(jme); if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN)) jme_wait_link(jme); - jme_clear_pm(jme); + jme_clear_pm_enable_wol(jme); } else { jme_phy_off(jme); } @@ -2646,9 +2652,6 @@ jme_set_wol(struct net_device *netdev, if (wol->wolopts & WAKE_MAGIC) jme->reg_pmcs |= PMCS_MFEN; - jwrite32(jme, JME_PMCS, jme->reg_pmcs); - device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs)); - return 0; } @@ -3172,8 +3175,8 @@ jme_init_one(struct pci_dev *pdev, jme->mii_if.mdio_read = jme_mdio_read; jme->mii_if.mdio_write = jme_mdio_write; - jme_clear_pm(jme); - device_set_wakeup_enable(&pdev->dev, true); + jme_clear_pm_disable_wol(jme); + device_init_wakeup(&pdev->dev, true); jme_set_phyfifo_5level(jme); jme->pcirev = pdev->revision; @@ -3304,7 +3307,7 @@ jme_resume(struct device *dev) if (!netif_running(netdev)) return 0; - jme_clear_pm(jme); + jme_clear_pm_disable_wol(jme); jme_phy_on(jme); if (test_bit(JME_FLAG_SSET, &jme->flags)) jme_set_settings(netdev, &jme->old_ecmd); diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index e88afac51c5d..f96ab2f4b90e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1557,6 +1557,8 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) /* the fw is stopped, the aux sta is dead: clean up driver state */ iwl_mvm_del_aux_sta(mvm); + iwl_free_fw_paging(mvm); + /* * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete() * won't be called in this case). diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index c3adf2bcdc85..13c97f665ba8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -645,8 +645,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode) for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++) kfree(mvm->nvm_sections[i].data); - iwl_free_fw_paging(mvm); - iwl_mvm_tof_clean(mvm); ieee80211_free_hw(mvm->hw); diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 8c7204738aa3..00e0332e2544 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -731,8 +731,8 @@ static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans) */ val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0); if (val & (BIT(1) | BIT(17))) { - IWL_INFO(trans, - "can't access the RSA semaphore it is write protected\n"); + IWL_DEBUG_INFO(trans, + "can't access the RSA semaphore it is write protected\n"); return 0; } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 86fff625f6d5..c6d196188bc9 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -760,6 +760,16 @@ const void * __init of_flat_dt_match_machine(const void *default_match, } #ifdef CONFIG_BLK_DEV_INITRD +#ifndef __early_init_dt_declare_initrd +static void __early_init_dt_declare_initrd(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; +} +#endif + /** * early_init_dt_check_for_initrd - Decode initrd location from flat tree * @node: reference to node containing initrd location ('chosen') @@ -782,9 +792,7 @@ static void __init early_init_dt_check_for_initrd(unsigned long node) return; end = of_read_number(prop, len/4); - initrd_start = (unsigned long)__va(start); - initrd_end = (unsigned long)__va(end); - initrd_below_start_ok = 1; + __early_init_dt_declare_initrd(start, end); pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", (unsigned long long)start, (unsigned long long)end); @@ -1006,13 +1014,16 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK +#ifndef MIN_MEMBLOCK_ADDR +#define MIN_MEMBLOCK_ADDR __pa(PAGE_OFFSET) +#endif #ifndef MAX_MEMBLOCK_ADDR #define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0) #endif void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { - const u64 phys_offset = __pa(PAGE_OFFSET); + const u64 phys_offset = MIN_MEMBLOCK_ADDR; if (!PAGE_ALIGNED(base)) { if (size < PAGE_SIZE - (base & ~PAGE_MASK)) { diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 5c717275a7fa..3d8019eb3d84 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -939,7 +939,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev); int eint_num, virq, eint_offset; unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc; - static const unsigned int dbnc_arr[] = {0 , 1, 16, 32, 64, 128, 256}; + static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, 64000, + 128000, 256000}; const struct mtk_desc_pin *pin; struct irq_data *d; @@ -957,9 +958,9 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset, if (!mtk_eint_can_en_debounce(pctl, eint_num)) return -ENOSYS; - dbnc = ARRAY_SIZE(dbnc_arr); - for (i = 0; i < ARRAY_SIZE(dbnc_arr); i++) { - if (debounce <= dbnc_arr[i]) { + dbnc = ARRAY_SIZE(debounce_time); + for (i = 0; i < ARRAY_SIZE(debounce_time); i++) { + if (debounce <= debounce_time[i]) { dbnc = i; break; } diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index ef04b962c3d5..23b6b8c29a99 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1273,9 +1273,9 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs, /* Parse pins in each row from LSB */ while (mask) { - bit_pos = ffs(mask); + bit_pos = __ffs(mask); pin_num_from_lsb = bit_pos / pcs->bits_per_pin; - mask_pos = ((pcs->fmask) << (bit_pos - 1)); + mask_pos = ((pcs->fmask) << bit_pos); val_pos = val & mask_pos; submask = mask & mask_pos; @@ -1847,7 +1847,7 @@ static int pcs_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "pinctrl-single,function-mask", &pcs->fmask); if (!ret) { - pcs->fshift = ffs(pcs->fmask) - 1; + pcs->fshift = __ffs(pcs->fmask); pcs->fmax = pcs->fmask >> pcs->fshift; } else { /* If mask property doesn't exist, function mux is invalid. */ diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index b0f62141ea4d..f774cb576ffa 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -131,7 +131,7 @@ MODULE_LICENSE("GPL"); /* Field definitions */ #define HCI_ACCEL_MASK 0x7fff #define HCI_HOTKEY_DISABLE 0x0b -#define HCI_HOTKEY_ENABLE 0x01 +#define HCI_HOTKEY_ENABLE 0x09 #define HCI_HOTKEY_SPECIAL_FUNCTIONS 0x10 #define HCI_LCD_BRIGHTNESS_BITS 3 #define HCI_LCD_BRIGHTNESS_SHIFT (16-HCI_LCD_BRIGHTNESS_BITS) diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c index 423ce087cd9c..5d5adee16886 100644 --- a/drivers/pwm/pwm-brcmstb.c +++ b/drivers/pwm/pwm-brcmstb.c @@ -274,8 +274,8 @@ static int brcmstb_pwm_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); p->base = devm_ioremap_resource(&pdev->dev, res); - if (!p->base) { - ret = -ENOMEM; + if (IS_ERR(p->base)) { + ret = PTR_ERR(p->base); goto out_clk; } diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7b94b8ee087c..732ac71b82cd 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -148,7 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev) { int i; - for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++) + for (i = 0; rdev; rdev = rdev_get_supply(rdev), i++) mutex_lock_nested(&rdev->mutex, i); } diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c index 58f5d3b8e981..27343e1c43ef 100644 --- a/drivers/regulator/s5m8767.c +++ b/drivers/regulator/s5m8767.c @@ -202,9 +202,10 @@ static int s5m8767_get_register(struct s5m8767_info *s5m8767, int reg_id, } } - if (i < s5m8767->num_regulators) - *enable_ctrl = - s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT; + if (i >= s5m8767->num_regulators) + return -EINVAL; + + *enable_ctrl = s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT; return 0; } @@ -937,8 +938,12 @@ static int s5m8767_pmic_probe(struct platform_device *pdev) else regulators[id].vsel_mask = 0xff; - s5m8767_get_register(s5m8767, id, &enable_reg, + ret = s5m8767_get_register(s5m8767, id, &enable_reg, &enable_val); + if (ret) { + dev_err(s5m8767->dev, "error reading registers\n"); + return ret; + } regulators[id].enable_reg = enable_reg; regulators[id].enable_mask = S5M8767_ENCTRL_MASK; regulators[id].enable_val = enable_val; diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c index 05a51ef52703..d5c1b057a739 100644 --- a/drivers/rtc/rtc-ds1685.c +++ b/drivers/rtc/rtc-ds1685.c @@ -187,9 +187,9 @@ ds1685_rtc_end_data_access(struct ds1685_priv *rtc) * Only use this where you are certain another lock will not be held. */ static inline void -ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long flags) +ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long *flags) { - spin_lock_irqsave(&rtc->lock, flags); + spin_lock_irqsave(&rtc->lock, *flags); ds1685_rtc_switch_to_bank1(rtc); } @@ -1304,7 +1304,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev, { struct ds1685_priv *rtc = dev_get_drvdata(dev); u8 reg = 0, bit = 0, tmp; - unsigned long flags = 0; + unsigned long flags; long int val = 0; const struct ds1685_rtc_ctrl_regs *reg_info = ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name); @@ -1325,7 +1325,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev, bit = reg_info->bit; /* Safe to spinlock during a write. */ - ds1685_rtc_begin_ctrl_access(rtc, flags); + ds1685_rtc_begin_ctrl_access(rtc, &flags); tmp = rtc->read(rtc, reg); rtc->write(rtc, reg, (val ? (tmp | bit) : (tmp & ~(bit)))); ds1685_rtc_end_ctrl_access(rtc, flags); diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 097325d96db5..b1b4746a0eab 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -144,7 +144,7 @@ static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm) * it does not seem to carry it over a subsequent write/read. * So we'll limit ourself to 100 years, starting at 2000 for now. */ - buf[6] = tm->tm_year - 100; + buf[6] = bin2bcd(tm->tm_year - 100); /* * CTL1 only contains TEST-mode bits apart from stop, diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c index 7184a0eda793..725dccae24e7 100644 --- a/drivers/rtc/rtc-max77686.c +++ b/drivers/rtc/rtc-max77686.c @@ -465,7 +465,7 @@ static int max77686_rtc_probe(struct platform_device *pdev) info->virq = regmap_irq_get_virq(max77686->rtc_irq_data, MAX77686_RTCIRQ_RTCA1); - if (!info->virq) { + if (info->virq <= 0) { ret = -ENXIO; goto err_rtc; } diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index bd911bafb809..17341feadad1 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -65,7 +65,6 @@ static const struct i2c_device_id rx8025_id[] = { { "rx8025", 0 }, - { "rv8803", 1 }, { } }; MODULE_DEVICE_TABLE(i2c, rx8025_id); diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c index f64c282275b3..e1b86bb01062 100644 --- a/drivers/rtc/rtc-vr41xx.c +++ b/drivers/rtc/rtc-vr41xx.c @@ -272,12 +272,13 @@ static irqreturn_t rtclong1_interrupt(int irq, void *dev_id) } static const struct rtc_class_ops vr41xx_rtc_ops = { - .release = vr41xx_rtc_release, - .ioctl = vr41xx_rtc_ioctl, - .read_time = vr41xx_rtc_read_time, - .set_time = vr41xx_rtc_set_time, - .read_alarm = vr41xx_rtc_read_alarm, - .set_alarm = vr41xx_rtc_set_alarm, + .release = vr41xx_rtc_release, + .ioctl = vr41xx_rtc_ioctl, + .read_time = vr41xx_rtc_read_time, + .set_time = vr41xx_rtc_set_time, + .read_alarm = vr41xx_rtc_read_alarm, + .set_alarm = vr41xx_rtc_set_alarm, + .alarm_irq_enable = vr41xx_rtc_alarm_irq_enable, }; static int rtc_probe(struct platform_device *pdev) diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig index e5647d59224f..0b331c9c0a8f 100644 --- a/drivers/scsi/device_handler/Kconfig +++ b/drivers/scsi/device_handler/Kconfig @@ -13,13 +13,13 @@ menuconfig SCSI_DH config SCSI_DH_RDAC tristate "LSI RDAC Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a LSI RDAC select y. Otherwise, say N. config SCSI_DH_HP_SW tristate "HP/COMPAQ MSA Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a HP/COMPAQ MSA device that requires START_STOP to be sent to start it and cannot upgrade the firmware then select y. @@ -27,13 +27,13 @@ config SCSI_DH_HP_SW config SCSI_DH_EMC tristate "EMC CLARiiON Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help If you have a EMC CLARiiON select y. Otherwise, say N. config SCSI_DH_ALUA tristate "SPC-3 ALUA Device Handler" - depends on SCSI_DH + depends on SCSI_DH && SCSI help SCSI Device handler for generic SPC-3 Asymmetric Logical Unit Access (ALUA). diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 97a1c1c33b05..00ce3e269a43 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -6282,12 +6282,13 @@ out: } for (i = 0; i < ioc->sge_count; i++) { - if (kbuff_arr[i]) + if (kbuff_arr[i]) { dma_free_coherent(&instance->pdev->dev, le32_to_cpu(kern_sge32[i].length), kbuff_arr[i], le32_to_cpu(kern_sge32[i].phys_addr)); kbuff_arr[i] = NULL; + } } megasas_return_cmd(instance, cmd); diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 79a8bc4f6cec..035767c02072 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -265,7 +265,10 @@ static inline u32 rx_max(struct rockchip_spi *rs) static void rockchip_spi_set_cs(struct spi_device *spi, bool enable) { u32 ser; - struct rockchip_spi *rs = spi_master_get_devdata(spi->master); + struct spi_master *master = spi->master; + struct rockchip_spi *rs = spi_master_get_devdata(master); + + pm_runtime_get_sync(rs->dev); ser = readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & SER_MASK; @@ -290,6 +293,8 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable) ser &= ~(1 << spi->chip_select); writel_relaxed(ser, rs->regs + ROCKCHIP_SPI_SER); + + pm_runtime_put_sync(rs->dev); } static int rockchip_spi_prepare_message(struct spi_master *master, diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO index 05de0dad8762..4c6f1d7d2eaf 100644 --- a/drivers/staging/rdma/hfi1/TODO +++ b/drivers/staging/rdma/hfi1/TODO @@ -3,4 +3,4 @@ July, 2015 - Remove unneeded file entries in sysfs - Remove software processing of IB protocol and place in library for use by qib, ipath (if still present), hfi1, and eventually soft-roce - +- Replace incorrect uAPI diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index aae9826ec62b..c851e51b1dc3 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -62,6 +62,8 @@ #include <linux/cred.h> #include <linux/uio.h> +#include <rdma/ib.h> + #include "hfi.h" #include "pio.h" #include "device.h" @@ -214,6 +216,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data, int uctxt_required = 1; int must_be_root = 0; + /* FIXME: This interface cannot continue out of staging */ + if (WARN_ON_ONCE(!ib_safe_file_access(fp))) + return -EACCES; + if (count < sizeof(cmd)) { ret = -EINVAL; goto bail; diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index e845841ab036..7106288efae3 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -545,15 +545,14 @@ static int rockchip_configure_from_dt(struct device *dev, thermal->chip->tshut_temp); thermal->tshut_temp = thermal->chip->tshut_temp; } else { + if (shut_temp > INT_MAX) { + dev_err(dev, "Invalid tshut temperature specified: %d\n", + shut_temp); + return -ERANGE; + } thermal->tshut_temp = shut_temp; } - if (thermal->tshut_temp > INT_MAX) { - dev_err(dev, "Invalid tshut temperature specified: %d\n", - thermal->tshut_temp); - return -ERANGE; - } - if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) { dev_warn(dev, "Missing tshut mode property, using default (%s)\n", diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 51c7507b0444..63a06ab6ba03 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -38,7 +38,6 @@ #include <linux/major.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/notifier.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -116,8 +115,6 @@ struct sci_port { struct timer_list rx_timer; unsigned int rx_timeout; #endif - - struct notifier_block freq_transition; }; #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS @@ -1606,29 +1603,6 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr) return ret; } -/* - * Here we define a transition notifier so that we can update all of our - * ports' baud rate when the peripheral clock changes. - */ -static int sci_notifier(struct notifier_block *self, - unsigned long phase, void *p) -{ - struct sci_port *sci_port; - unsigned long flags; - - sci_port = container_of(self, struct sci_port, freq_transition); - - if (phase == CPUFREQ_POSTCHANGE) { - struct uart_port *port = &sci_port->port; - - spin_lock_irqsave(&port->lock, flags); - port->uartclk = clk_get_rate(sci_port->iclk); - spin_unlock_irqrestore(&port->lock, flags); - } - - return NOTIFY_OK; -} - static const struct sci_irq_desc { const char *desc; irq_handler_t handler; @@ -2559,9 +2533,6 @@ static int sci_remove(struct platform_device *dev) { struct sci_port *port = platform_get_drvdata(dev); - cpufreq_unregister_notifier(&port->freq_transition, - CPUFREQ_TRANSITION_NOTIFIER); - uart_remove_one_port(&sci_uart_driver, &port->port); sci_cleanup_single(port); @@ -2714,16 +2685,6 @@ static int sci_probe(struct platform_device *dev) if (ret) return ret; - sp->freq_transition.notifier_call = sci_notifier; - - ret = cpufreq_register_notifier(&sp->freq_transition, - CPUFREQ_TRANSITION_NOTIFIER); - if (unlikely(ret < 0)) { - uart_remove_one_port(&sci_uart_driver, &sp->port); - sci_cleanup_single(sp); - return ret; - } - #ifdef CONFIG_SH_STANDARD_BIOS sh_bios_gdb_detach(); #endif diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 9eb1cff28bd4..b8b580e5ae6e 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -74,6 +74,15 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd, if (companion->bus != pdev->bus || PCI_SLOT(companion->devfn) != slot) continue; + + /* + * Companion device should be either UHCI,OHCI or EHCI host + * controller, otherwise skip. + */ + if (companion->class != CL_UHCI && companion->class != CL_OHCI && + companion->class != CL_EHCI) + continue; + companion_hcd = pci_get_drvdata(companion); if (!companion_hcd || !companion_hcd->self.root_hub) continue; diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index cf43e9e18368..79d895c2dd71 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -646,6 +646,7 @@ static void ffs_user_copy_worker(struct work_struct *work) work); int ret = io_data->req->status ? io_data->req->status : io_data->req->actual; + bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD; if (io_data->read && ret > 0) { use_mm(io_data->mm); @@ -657,13 +658,11 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->kiocb->ki_complete(io_data->kiocb, ret, ret); - if (io_data->ffs->ffs_eventfd && - !(io_data->kiocb->ki_flags & IOCB_EVENTFD)) + if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd) eventfd_signal(io_data->ffs->ffs_eventfd, 1); usb_ep_free_request(io_data->ep, io_data->req); - io_data->kiocb->private = NULL; if (io_data->read) kfree(io_data->to_free); kfree(io_data->buf); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index c48cbe731356..d8dbd7e5194b 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1875,6 +1875,12 @@ no_bw: kfree(xhci->rh_bw); kfree(xhci->ext_caps); + xhci->usb2_ports = NULL; + xhci->usb3_ports = NULL; + xhci->port_array = NULL; + xhci->rh_bw = NULL; + xhci->ext_caps = NULL; + xhci->page_size = 0; xhci->page_shift = 0; xhci->bus_state[0].bus_suspended = 0; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index c2d65206ec6c..ea4fb4b0cd44 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -48,6 +48,7 @@ #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 +#define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 static const char hcd_name[] = "xhci_hcd"; @@ -156,7 +157,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_ETRON && @@ -299,6 +301,7 @@ static void xhci_pci_remove(struct pci_dev *dev) struct xhci_hcd *xhci; xhci = hcd_to_xhci(pci_get_drvdata(dev)); + xhci->xhc_state |= XHCI_STATE_REMOVING; if (xhci->shared_hcd) { usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index db0f0831b94f..2b63969c2bbf 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -4008,7 +4008,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd, int reserved_trbs = xhci->cmd_ring_reserved_trbs; int ret; - if (xhci->xhc_state) { + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n"); return -ESHUTDOWN; } diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 776d59c32bc5..ec9e758d5fcd 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci) "waited %u microseconds.\n", XHCI_MAX_HALT_USEC); if (!ret) - xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING); + /* clear state flags. Including dying, halted or removing */ + xhci->xhc_state = 0; return ret; } @@ -1103,8 +1104,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Resume root hubs only when have pending events. */ status = readl(&xhci->op_regs->status); if (status & STS_EINT) { - usb_hcd_resume_root_hub(hcd); usb_hcd_resume_root_hub(xhci->shared_hcd); + usb_hcd_resume_root_hub(hcd); } } @@ -1119,10 +1120,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) /* Re-enable port polling. */ xhci_dbg(xhci, "%s: starting port polling.\n", __func__); - set_bit(HCD_FLAG_POLL_RH, &hcd->flags); - usb_hcd_poll_rh_status(hcd); set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags); usb_hcd_poll_rh_status(xhci->shared_hcd); + set_bit(HCD_FLAG_POLL_RH, &hcd->flags); + usb_hcd_poll_rh_status(hcd); return retval; } @@ -2753,7 +2754,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); - if (xhci->xhc_state & XHCI_STATE_DYING) + if ((xhci->xhc_state & XHCI_STATE_DYING) || + (xhci->xhc_state & XHCI_STATE_REMOVING)) return -ENODEV; xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); @@ -3800,7 +3802,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_lock(&xhci->mutex); - if (xhci->xhc_state) /* dying or halted */ + if (xhci->xhc_state) /* dying, removing or halted */ goto out; if (!udev->slot_id) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 0b9451250e33..99ac2289dbf3 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1596,6 +1596,7 @@ struct xhci_hcd { */ #define XHCI_STATE_DYING (1 << 0) #define XHCI_STATE_HALTED (1 << 1) +#define XHCI_STATE_REMOVING (1 << 2) /* Statistics */ int error_bitmask; unsigned int quirks; diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index facaaf003f19..e40da7759a0e 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -741,6 +741,17 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) if (!(size > 0)) return 0; + if (size > urb->transfer_buffer_length) { + /* should not happen, probably malicious packet */ + if (ud->side == USBIP_STUB) { + usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); + return 0; + } else { + usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); + return -EPIPE; + } + } + ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); if (ret != size) { dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index e6d16d65e4e6..f07a0974fda2 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2249,7 +2249,6 @@ config XEN_FBDEV_FRONTEND select FB_SYS_IMAGEBLIT select FB_SYS_FOPS select FB_DEFERRED_IO - select INPUT_XEN_KBDDEV_FRONTEND if INPUT_MISC select XEN_XENBUS_FRONTEND default y help diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c index 0081725c6b5b..d00510029c93 100644 --- a/drivers/video/fbdev/da8xx-fb.c +++ b/drivers/video/fbdev/da8xx-fb.c @@ -209,8 +209,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 2, .hsync_len = 0, .vsync_len = 0, - .sync = FB_SYNC_CLK_INVERT | - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = FB_SYNC_CLK_INVERT, }, /* Sharp LK043T1DG01 */ [1] = { @@ -224,7 +223,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 2, .hsync_len = 41, .vsync_len = 10, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = 0, .flag = 0, }, [2] = { @@ -239,7 +238,7 @@ static struct fb_videomode known_lcd_panels[] = { .lower_margin = 10, .hsync_len = 10, .vsync_len = 10, - .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .sync = 0, .flag = 0, }, [3] = { diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b7fcc0de0b2f..0f5d05bf2131 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name, if (unlikely(!inode)) return failed_creating(dentry); - inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + make_empty_dir_inode(inode); inode->i_flags |= S_AUTOMOUNT; inode->i_private = data; dentry->d_fsdata = (void *)f; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 786cb51cab56..daad932eeb38 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -933,6 +933,15 @@ struct ext4_inode_info { * by other means, so we have i_data_sem. */ struct rw_semaphore i_data_sem; + /* + * i_mmap_sem is for serializing page faults with truncate / punch hole + * operations. We have to make sure that new page cannot be faulted in + * a section of the inode that is being punched. We cannot easily use + * i_data_sem for this since we need protection for the whole punch + * operation and i_data_sem ranks below transaction start so we have + * to occasionally drop it. + */ + struct rw_semaphore i_mmap_sem; struct inode vfs_inode; struct jbd2_inode *jinode; @@ -2508,6 +2517,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); +extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); @@ -2872,6 +2882,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) return changed; } +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len); + struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 551353b1b17a..3578b25fccfd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - /* * credits to insert 1 extent into extent tree */ @@ -4752,8 +4748,6 @@ retry: goto retry; } - ext4_inode_resume_unlocked_dio(inode); - return ret > 0 ? ret2 : ret; } @@ -4770,7 +4764,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, int partial_begin, partial_end; loff_t start, end; ext4_lblk_t lblk; - struct address_space *mapping = inode->i_mapping; unsigned int blkbits = inode->i_blkbits; trace_ext4_zero_range(inode, offset, len, mode); @@ -4786,17 +4779,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, } /* - * Write out all dirty pages to avoid race conditions - * Then release them. - */ - if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - ret = filemap_write_and_wait_range(mapping, offset, - offset + len - 1); - if (ret) - return ret; - } - - /* * Round up offset. This is not fallocate, we neet to zero out * blocks, so convert interior block aligned part of the range to * unwritten and possibly manually zero out unaligned parts of the @@ -4839,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4847,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, round_down(offset, 1 << blkbits)) >> blkbits, new_size, flags, mode); if (ret) - goto out_mutex; + goto out_dio; } @@ -4856,16 +4842,23 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); - /* Now release the pages and zero block aligned part of pages*/ + /* + * Prevent page faults from reinstantiating pages we have + * released from page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + ret = ext4_update_disksize_before_punch(inode, offset, len); + if (ret) { + up_write(&EXT4_I(inode)->i_mmap_sem); + goto out_dio; + } + /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags, mode); + up_write(&EXT4_I(inode)->i_mmap_sem); if (ret) goto out_dio; } @@ -4998,8 +4991,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags, mode); + ext4_inode_resume_unlocked_dio(inode); if (ret) goto out; @@ -5494,21 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) return ret; } - /* - * Need to round down offset to be aligned with page size boundary - * for page size > block size. - */ - ioffset = round_down(offset, PAGE_SIZE); - - /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - LLONG_MAX); - if (ret) - return ret; - - /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* * There is no need to overlap collapse range with EOF, in which case * it is effectively a truncate operation @@ -5524,17 +5508,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - truncate_pagecache(inode, ioffset); - /* Wait for existing dio to complete */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + /* + * Need to round down offset to be aligned with page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + /* + * Write tail of the last page before removed range since it will get + * removed from the page cache below. + */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset); + if (ret) + goto out_mmap; + /* + * Write data that will be shifted to preserve them when discarding + * page cache below. We are also protected from pages becoming dirty + * by i_mmap_sem. + */ + ret = filemap_write_and_wait_range(inode->i_mapping, offset + len, + LLONG_MAX); + if (ret) + goto out_mmap; + truncate_pagecache(inode, ioffset); + credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out_dio; + goto out_mmap; } down_write(&EXT4_I(inode)->i_data_sem); @@ -5573,7 +5583,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); -out_dio: +out_mmap: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -5627,21 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) return ret; } - /* - * Need to round down to align start offset to page size boundary - * for page size > block size. - */ - ioffset = round_down(offset, PAGE_SIZE); - - /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - LLONG_MAX); - if (ret) - return ret; - - /* Take mutex lock */ mutex_lock(&inode->i_mutex); - /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { ret = -EOPNOTSUPP; @@ -5660,17 +5657,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - truncate_pagecache(inode, ioffset); - /* Wait for existing dio to complete */ ext4_inode_block_unlocked_dio(inode); inode_dio_wait(inode); + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); + /* + * Need to round down to align start offset to page size boundary + * for page size > block size. + */ + ioffset = round_down(offset, PAGE_SIZE); + /* Write out all dirty pages */ + ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, + LLONG_MAX); + if (ret) + goto out_mmap; + truncate_pagecache(inode, ioffset); + credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out_dio; + goto out_mmap; } /* Expand file to avoid data loss if there is error while shifting */ @@ -5741,7 +5753,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) out_stop: ext4_journal_stop(handle); -out_dio: +out_mmap: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 113837e7ba98..0d24ebcd7c9e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { int result; handle_t *handle = NULL; - struct super_block *sb = file_inode(vma->vm_file)->i_sb; + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; bool write = vmf->flags & FAULT_FLAG_WRITE; if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, EXT4_DATA_TRANS_BLOCKS(sb)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; @@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } @@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { sb_start_pagefault(sb); file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, ext4_chunk_trans_blocks(inode, PMD_SIZE / PAGE_SIZE)); - } + } else + down_read(&EXT4_I(inode)->i_mmap_sem); if (IS_ERR(handle)) result = VM_FAULT_SIGBUS; @@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, if (write) { if (!IS_ERR(handle)) ext4_journal_stop(handle); + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(sb); - } + } else + up_read(&EXT4_I(inode)->i_mmap_sem); return result; } static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext4_get_block_dax, - ext4_end_io_unwritten); + int err; + struct inode *inode = file_inode(vma->vm_file); + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + err = __dax_mkwrite(vma, vmf, ext4_get_block_dax, + ext4_end_io_unwritten); + up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(inode->i_sb); + + return err; +} + +/* + * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() + * handler we check for races agaist truncate. Note that since we cycle through + * i_mmap_sem, we are sure that also any hole punching that began before we + * were called is finished by now and so if it included part of the file we + * are working on, our pte will get unmapped and the check for pte_same() in + * wp_pfn_shared() fails. Thus fault gets retried and things work out as + * desired. + */ +static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct super_block *sb = inode->i_sb; + int ret = VM_FAULT_NOPAGE; + loff_t size; + + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + down_read(&EXT4_I(inode)->i_mmap_sem); + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + up_read(&EXT4_I(inode)->i_mmap_sem); + sb_end_pagefault(sb); + + return ret; } static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .pmd_fault = ext4_dax_pmd_fault, .page_mkwrite = ext4_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .pfn_mkwrite = ext4_dax_pfn_mkwrite, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops #endif static const struct vm_operations_struct ext4_file_vm_ops = { - .fault = filemap_fault, + .fault = ext4_filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 06bda0361e7c..e31d762eedce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3587,6 +3587,35 @@ int ext4_can_truncate(struct inode *inode) } /* + * We have to make sure i_disksize gets properly updated before we truncate + * page cache due to hole punching or zero range. Otherwise i_disksize update + * can get lost as it may have been postponed to submission of writeback but + * that will never happen after we truncate page cache. + */ +int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, + loff_t len) +{ + handle_t *handle; + loff_t size = i_size_read(inode); + + WARN_ON(!mutex_is_locked(&inode->i_mutex)); + if (offset > size || offset + len < size) + return 0; + + if (EXT4_I(inode)->i_disksize >= size) + return 0; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_update_i_disksize(inode, size); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + + return 0; +} + +/* * ext4_punch_hole: punches a hole in a file by releaseing the blocks * associated with the given offset and length * @@ -3651,17 +3680,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } + /* Wait all existing dio workers, newcomers will block on i_mutex */ + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + + /* + * Prevent page faults from reinstantiating pages we have released from + * page cache. + */ + down_write(&EXT4_I(inode)->i_mmap_sem); first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; /* Now release the pages and zero block aligned part of pages*/ - if (last_block_offset > first_block_offset) + if (last_block_offset > first_block_offset) { + ret = ext4_update_disksize_before_punch(inode, offset, length); + if (ret) + goto out_dio; truncate_pagecache_range(inode, first_block_offset, last_block_offset); - - /* Wait all existing dio workers, newcomers will block on i_mutex */ - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); + } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); @@ -3708,16 +3746,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - /* Now release the pages again to reduce race window */ - if (last_block_offset > first_block_offset) - truncate_pagecache_range(inode, first_block_offset, - last_block_offset); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); out_stop: ext4_journal_stop(handle); out_dio: + up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); out_mutex: mutex_unlock(&inode->i_mutex); @@ -4851,6 +4885,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } else ext4_wait_for_tail_page_commit(inode); } + down_write(&EXT4_I(inode)->i_mmap_sem); /* * Truncate pagecache after we've waited for commit * in data=journal mode to make pages freeable. @@ -4858,6 +4893,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) truncate_pagecache(inode, inode->i_size); if (shrink) ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } if (!rc) { @@ -5109,6 +5145,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) might_sleep(); trace_ext4_mark_inode_dirty(inode, _RET_IP_); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { @@ -5139,9 +5177,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) } } } - if (!err) - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - return err; + return ext4_mark_iloc_dirty(handle, inode, &iloc); } /* @@ -5306,6 +5342,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); file_update_time(vma->vm_file); + + down_read(&EXT4_I(inode)->i_mmap_sem); /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && @@ -5375,6 +5413,19 @@ retry_alloc: out_ret: ret = block_page_mkwrite_return(ret); out: + up_read(&EXT4_I(inode)->i_mmap_sem); sb_end_pagefault(inode->i_sb); return ret; } + +int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int err; + + down_read(&EXT4_I(inode)->i_mmap_sem); + err = filemap_fault(vma, vmf); + up_read(&EXT4_I(inode)->i_mmap_sem); + + return err; +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba1cf0bf2f81..852c26806af2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -958,6 +958,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&ei->i_orphan); init_rwsem(&ei->xattr_sem); init_rwsem(&ei->i_data_sem); + init_rwsem(&ei->i_mmap_sem); inode_init_once(&ei->vfs_inode); } diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h index 011ba6670d99..c70d06a383e2 100644 --- a/fs/ext4/truncate.h +++ b/fs/ext4/truncate.h @@ -10,8 +10,10 @@ */ static inline void ext4_truncate_failed_write(struct inode *inode) { + down_write(&EXT4_I(inode)->i_mmap_sem); truncate_inode_pages(inode->i_mapping, inode->i_size); ext4_truncate(inode); + up_write(&EXT4_I(inode)->i_mmap_sem); } /* diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 67aa7e63a5c1..3e97da37b67c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1497,6 +1497,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, return page; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static struct page *can_gather_numa_stats_pmd(pmd_t pmd, + struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + int nid; + + if (!pmd_present(pmd)) + return NULL; + + page = vm_normal_page_pmd(vma, addr, pmd); + if (!page) + return NULL; + + if (PageReserved(page)) + return NULL; + + nid = page_to_nid(page); + if (!node_isset(nid, node_states[N_MEMORY])) + return NULL; + + return page; +} +#endif + static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -1506,13 +1532,13 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, pte_t *orig_pte; pte_t *pte; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - pte_t huge_pte = *(pte_t *)pmd; struct page *page; - page = can_gather_numa_stats(huge_pte, vma, addr); + page = can_gather_numa_stats_pmd(*pmd, vma, addr); if (page) - gather_stats(page, md, pte_dirty(huge_pte), + gather_stats(page, md, pmd_dirty(*pmd), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; @@ -1520,6 +1546,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; +#endif orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { struct page *page = can_gather_numa_stats(*pte, vma, addr); diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 1cbb8338edf3..827e4d3bbc7a 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -70,12 +70,12 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #endif /* Return a pointer with offset calculated */ -#define __set_fixmap_offset(idx, phys, flags) \ -({ \ - unsigned long addr; \ - __set_fixmap(idx, phys, flags); \ - addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ - addr; \ +#define __set_fixmap_offset(idx, phys, flags) \ +({ \ + unsigned long ________addr; \ + __set_fixmap(idx, phys, flags); \ + ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \ + ________addr; \ }) #define set_fixmap_offset(idx, phys) \ diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable(); diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 461a0558bca4..cebecff536a3 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void) { #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) return false; +#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) + return false; #else return true; #endif diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index e63d3a513e67..788c7c49a673 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -435,6 +435,7 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); + void (*post_attach)(void); int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index fea160ee5803..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } -extern void cpuset_post_attach_flush(void); - #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } -static inline void cpuset_post_attach_flush(void) -{ -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 47be3ad7d3e5..333d0ca6940f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -299,7 +299,7 @@ typedef struct { void *open_protocol_information; void *protocols_per_handle; void *locate_handle_buffer; - void *locate_protocol; + efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **); void *install_multiple_protocol_interfaces; void *uninstall_multiple_protocol_interfaces; void *calculate_crc32; @@ -599,6 +599,10 @@ void efi_native_runtime_setup(void); #define EFI_PROPERTIES_TABLE_GUID \ EFI_GUID( 0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 ) +#define EFI_RNG_PROTOCOL_GUID \ + EFI_GUID(0x3152bca5, 0xeade, 0x433d, \ + 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) + typedef struct { efi_guid_t guid; u64 table; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 685c262e0be8..b0eb06423d5e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -96,9 +96,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm, struct address_space *mapping, pgoff_t idx, unsigned long address); -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); -#endif extern int hugepages_treat_as_movable; extern int sysctl_hugetlb_shm_group; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef..a91b67b18a73 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -334,6 +334,17 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; diff --git a/include/linux/mm.h b/include/linux/mm.h index f4c35e8125d1..fb987b2e5abd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1096,6 +1096,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd); int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 647ebfe5174f..d4227a8a2a23 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -363,6 +363,7 @@ struct vb2_ops { }; struct vb2_buf_ops { + int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb); int (*fill_user_buffer)(struct vb2_buffer *vb, void *pb); int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb, struct vb2_plane *planes); diff --git a/include/rdma/ib.h b/include/rdma/ib.h index cf8f9e700e48..a6b93706b0fc 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -34,6 +34,7 @@ #define _RDMA_IB_H #include <linux/types.h> +#include <linux/sched.h> struct ib_addr { union { @@ -86,4 +87,19 @@ struct sockaddr_ib { __u64 sib_scope_id; }; +/* + * The IB interfaces that use write() as bi-directional ioctl() are + * fundamentally unsafe, since there are lots of ways to trigger "write()" + * calls from various contexts with elevated privileges. That includes the + * traditional suid executable error message writes, but also various kernel + * interfaces that can write to file descriptors. + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static inline bool ib_safe_file_access(struct file *filp) +{ + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); +} + #endif /* _RDMA_IB_H */ diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index c039f1d68a09..086168e18ca8 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -183,7 +183,8 @@ #define V4L2_DV_BT_CEA_3840X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -191,14 +192,16 @@ #define V4L2_DV_BT_CEA_3840X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -206,14 +209,16 @@ #define V4L2_DV_BT_CEA_3840X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -221,7 +226,8 @@ #define V4L2_DV_BT_CEA_4096X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -229,14 +235,16 @@ #define V4L2_DV_BT_CEA_4096X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -244,14 +252,16 @@ #define V4L2_DV_BT_CEA_4096X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f0bc36879e1d..47851dbc893d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2776,9 +2776,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool threadgroup) { struct task_struct *tsk; + struct cgroup_subsys *ss; struct cgroup *cgrp; pid_t pid; - int ret; + int ssid, ret; if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; @@ -2826,8 +2827,10 @@ out_unlock_rcu: rcu_read_unlock(); out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); + for_each_subsys(ss, ssid) + if (ss->post_attach) + ss->post_attach(); cgroup_kn_unlock(of->kn); - cpuset_post_attach_flush(); return ret ?: nbytes; } @@ -4744,14 +4747,15 @@ static void css_free_work_fn(struct work_struct *work) if (ss) { /* css free path */ + struct cgroup_subsys_state *parent = css->parent; int id = css->id; - if (css->parent) - css_put(css->parent); - ss->css_free(css); cgroup_idr_remove(&ss->css_idr, id); cgroup_put(cgrp); + + if (parent) + css_put(parent); } else { /* cgroup free path */ atomic_dec(&cgrp->root->nr_cgrps); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2ade632197d5..11eaf14b52c2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -57,7 +57,6 @@ #include <asm/uaccess.h> #include <linux/atomic.h> #include <linux/mutex.h> -#include <linux/workqueue.h> #include <linux/cgroup.h> #include <linux/wait.h> @@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, } } -void cpuset_post_attach_flush(void) +static void cpuset_post_attach(void) { flush_workqueue(cpuset_migrate_mm_wq); } @@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .can_attach = cpuset_can_attach, .cancel_attach = cpuset_cancel_attach, .attach = cpuset_attach, + .post_attach = cpuset_post_attach, .bind = cpuset_bind, .legacy_cftypes = files, .early_init = 1, diff --git a/kernel/futex.c b/kernel/futex.c index 461c72b2dac2..9d8163afd87c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1244,10 +1244,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; + } else if (curval != uval) { + /* + * If a unconditional UNLOCK_PI operation (user space did not + * try the TID->0 transition) raced with a waiter setting the + * FUTEX_WAITERS flag between get_user() and locking the hash + * bucket lock, retry the operation. + */ + if ((FUTEX_TID_MASK & curval) == uval) + ret = -EAGAIN; + else + ret = -EINVAL; + } if (ret) { raw_spin_unlock(&pi_state->pi_mutex.wait_lock); return ret; @@ -1474,8 +1484,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); hb_waiters_dec(hb1); - plist_add(&q->list, &hb2->chain); hb_waiters_inc(hb2); + plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); @@ -2538,6 +2548,15 @@ retry: if (ret == -EFAULT) goto pi_faulted; /* + * A unconditional UNLOCK_PI op raced against a waiter + * setting the FUTEX_WAITERS bit. Try again. + */ + if (ret == -EAGAIN) { + spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } + /* * wake_futex_pi has detected invalid state. Tell user * space. */ diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 5b9102a47ea5..c835270f0c2f 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) node->locked = 0; node->next = NULL; - prev = xchg_acquire(lock, node); + /* + * We rely on the full barrier with global transitivity implied by the + * below xchg() to order the initialization stores above against any + * observation of @node. And to provide the ACQUIRE ordering associated + * with a LOCK primitive. + */ + prev = xchg(lock, node); if (likely(prev == NULL)) { /* * Lock acquired, don't need to set node->locked to 1. Threads diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f11fdb5c8084..ff6b870315b7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7703,7 +7703,7 @@ void set_curr_task(int cpu, struct task_struct *p) /* task_group_lock serializes the addition/removal of task groups */ static DEFINE_SPINLOCK(task_group_lock); -static void free_sched_group(struct task_group *tg) +static void sched_free_group(struct task_group *tg) { free_fair_sched_group(tg); free_rt_sched_group(tg); @@ -7729,7 +7729,7 @@ struct task_group *sched_create_group(struct task_group *parent) return tg; err: - free_sched_group(tg); + sched_free_group(tg); return ERR_PTR(-ENOMEM); } @@ -7749,17 +7749,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent) } /* rcu callback to free various structures associated with a task group */ -static void free_sched_group_rcu(struct rcu_head *rhp) +static void sched_free_group_rcu(struct rcu_head *rhp) { /* now it should be safe to free those cfs_rqs */ - free_sched_group(container_of(rhp, struct task_group, rcu)); + sched_free_group(container_of(rhp, struct task_group, rcu)); } -/* Destroy runqueue etc associated with a task group */ void sched_destroy_group(struct task_group *tg) { /* wait for possible concurrent references to cfs_rqs complete */ - call_rcu(&tg->rcu, free_sched_group_rcu); + call_rcu(&tg->rcu, sched_free_group_rcu); } void sched_offline_group(struct task_group *tg) @@ -8220,31 +8219,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); + sched_online_group(tg, parent); + return &tg->css; } -static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - struct task_group *parent = css_tg(css->parent); - if (parent) - sched_online_group(tg, parent); - return 0; + sched_offline_group(tg); } static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); - sched_destroy_group(tg); -} - -static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) -{ - struct task_group *tg = css_tg(css); - - sched_offline_group(tg); + /* + * Relies on the RCU grace period between css_released() and this. + */ + sched_free_group(tg); } static void cpu_cgroup_fork(struct task_struct *task, void *private) @@ -8604,9 +8598,8 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, + .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, - .css_online = cpu_cgroup_css_online, - .css_offline = cpu_cgroup_css_offline, .fork = cpu_cgroup_fork, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 450c21fd0e6e..0ec05948a97b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -649,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work, */ smp_wmb(); set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); + /* + * The following mb guarantees that previous clear of a PENDING bit + * will not be reordered with any speculative LOADS or STORES from + * work->current_func, which is executed afterwards. This possible + * reordering can lead to a missed execution on attempt to qeueue + * the same @work. E.g. consider this case: + * + * CPU#0 CPU#1 + * ---------------------------- -------------------------------- + * + * 1 STORE event_indicated + * 2 queue_work_on() { + * 3 test_and_set_bit(PENDING) + * 4 } set_..._and_clear_pending() { + * 5 set_work_data() # clear bit + * 6 smp_mb() + * 7 work->current_func() { + * 8 LOAD event_indicated + * } + * + * Without an explicit full barrier speculative LOAD on line 8 can + * be executed before CPU#0 does STORE on line 1. If that happens, + * CPU#0 observes the PENDING bit is still set and new execution of + * a @work is not queued in a hope, that CPU#1 will eventually + * finish the queued @work. Meanwhile CPU#1 does not see + * event_indicated is set, because speculative LOAD was executed + * before actual STORE. + */ + smp_mb(); } static void clear_work_data(struct work_struct *work) diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 03dd576e6773..59fd7c0b119c 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, free_slot = i; continue; } - if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + if (assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) { pr_devel("replace in slot %d\n", i); edit->leaf_p = &node->slots[i]; edit->dead_leaf = node->slots[i]; diff --git a/lib/extable.c b/lib/extable.c index 4cac81ec225e..0be02ad561e9 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -14,7 +14,37 @@ #include <linux/sort.h> #include <asm/uaccess.h> +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define ex_to_insn(x) ((x)->insn) +#else +static inline unsigned long ex_to_insn(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} +#endif + #ifndef ARCH_HAS_SORT_EXTABLE +#ifndef ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex NULL +#else +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry *x = a, *y = b, tmp; + int delta = b - a; + + tmp = *x; + x->insn = y->insn + delta; + y->insn = tmp.insn - delta; + +#ifdef swap_ex_entry_fixup + swap_ex_entry_fixup(x, y, tmp, delta); +#else + x->fixup = y->fixup + delta; + y->fixup = tmp.fixup - delta; +#endif +} +#endif /* ARCH_HAS_RELATIVE_EXTABLE */ + /* * The exception table needs to be sorted so that the binary * search that we use to find entries in it works properly. @@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b) const struct exception_table_entry *x = a, *y = b; /* avoid overflow */ - if (x->insn > y->insn) + if (ex_to_insn(x) > ex_to_insn(y)) return 1; - if (x->insn < y->insn) + if (ex_to_insn(x) < ex_to_insn(y)) return -1; return 0; } @@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) { sort(start, finish - start, sizeof(struct exception_table_entry), - cmp_ex, NULL); + cmp_ex, swap_ex); } #ifdef CONFIG_MODULES @@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start, void trim_init_extable(struct module *m) { /*trim the beginning*/ - while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + while (m->num_exentries && + within_module_init(ex_to_insn(&m->extable[0]), m)) { m->extable++; m->num_exentries--; } /*trim the end*/ while (m->num_exentries && - within_module_init(m->extable[m->num_exentries-1].insn, m)) + within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]), + m)) m->num_exentries--; } #endif /* CONFIG_MODULES */ @@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first, * careful, the distance between value and insn * can be larger than MAX_LONG: */ - if (mid->insn < value) + if (ex_to_insn(mid) < value) first = mid + 1; - else if (mid->insn > value) + else if (ex_to_insn(mid) > value) last = mid - 1; else return mid; - } - return NULL; + } + return NULL; } #endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index abcecdc2d0f2..0710a62ad2f6 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -11,8 +11,7 @@ /* * Detects 64 bits mode */ -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ - || defined(__ppc64__) || defined(__LP64__)) +#if defined(CONFIG_64BIT) #define LZ4_ARCH64 1 #else #define LZ4_ARCH64 0 @@ -35,6 +34,10 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - A16(p)) + #define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ do { \ A16(p) = v; \ @@ -51,10 +54,13 @@ typedef struct _U64_S { u64 v; } U64_S; #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) -#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ - do { \ - put_unaligned(v, (u16 *)(p)); \ - p += 2; \ +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned_le16(v, (u16 *)(p)); \ + p += 2; \ } while (0) #endif @@ -140,9 +146,6 @@ typedef struct _U64_S { u64 v; } U64_S; #endif -#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ - (d = s - get_unaligned_le16(p)) - #define LZ4_WILDCOPY(s, d, e) \ do { \ LZ4_COPYPACKET(s, d); \ diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 3db76b8c1115..e00ff00e861c 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -128,6 +128,23 @@ leave: } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); +static int count_lzeros(MPI a) +{ + mpi_limb_t alimb; + int i, lzeros = 0; + + for (i = a->nlimbs - 1; i >= 0; i--) { + alimb = a->d[i]; + if (alimb == 0) { + lzeros += sizeof(mpi_limb_t); + } else { + lzeros += count_leading_zeros(alimb) / 8; + break; + } + } + return lzeros; +} + /** * mpi_read_buffer() - read MPI to a bufer provided by user (msb first) * @@ -146,7 +163,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, uint8_t *p; mpi_limb_t alimb; unsigned int n = mpi_get_size(a); - int i, lzeros = 0; + int i, lzeros; if (buf_len < n || !buf || !nbytes) return -EINVAL; @@ -154,14 +171,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); p = buf; *nbytes = n - lzeros; @@ -343,7 +353,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, u8 *p, *p2; mpi_limb_t alimb, alimb2; unsigned int n = mpi_get_size(a); - int i, x, y = 0, lzeros = 0, buf_len; + int i, x, y = 0, lzeros, buf_len; if (!nbytes || *nbytes < n) return -EINVAL; @@ -351,14 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes, if (sign) *sign = a->sign; - p = (void *)&a->d[a->nlimbs] - 1; - - for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) { - if (!*p) - lzeros++; - else - break; - } + lzeros = count_lzeros(a); *nbytes = n - lzeros; buf_len = sgl->length; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 62fe06bb7d04..530e6427f823 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma, * page fault if needed. */ return 0; - if (vma->vm_ops) + if (vma->vm_ops || (vm_flags & VM_NO_THP)) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) @@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) return false; if (is_vma_temporary_stack(vma)) return false; - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); - return true; + return !(vma->vm_flags & VM_NO_THP); } static void collapse_huge_page(struct mm_struct *mm, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7535ef32a75b..d1f6dc5a715d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { spinlock_t lock; /* for from, to */ + struct mm_struct *mm; struct mem_cgroup *from; struct mem_cgroup *to; unsigned long flags; @@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void) static void mem_cgroup_clear_mc(void) { + struct mm_struct *mm = mc.mm; + /* * we must clear moving_task before waking up waiters at the end of * task migration. @@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void) spin_lock(&mc.lock); mc.from = NULL; mc.to = NULL; + mc.mm = NULL; spin_unlock(&mc.lock); + + mmput(mm); } static int mem_cgroup_can_attach(struct cgroup_taskset *tset) @@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) VM_BUG_ON(mc.moved_swap); spin_lock(&mc.lock); + mc.mm = mm; mc.from = from; mc.to = memcg; mc.flags = move_flags; @@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) ret = mem_cgroup_precharge_mc(mm); if (ret) mem_cgroup_clear_mc(); + } else { + mmput(mm); } - mmput(mm); return ret; } @@ -4990,11 +4998,11 @@ put: /* get_mctgt_type() gets the page */ return ret; } -static void mem_cgroup_move_charge(struct mm_struct *mm) +static void mem_cgroup_move_charge(void) { struct mm_walk mem_cgroup_move_charge_walk = { .pmd_entry = mem_cgroup_move_charge_pte_range, - .mm = mm, + .mm = mc.mm, }; lru_add_drain_all(); @@ -5006,7 +5014,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) atomic_inc(&mc.from->moving_account); synchronize_rcu(); retry: - if (unlikely(!down_read_trylock(&mm->mmap_sem))) { + if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) { /* * Someone who are holding the mmap_sem might be waiting in * waitq. So we cancel all extra charges, wake up all waiters, @@ -5023,23 +5031,16 @@ retry: * additional charge, the page walk just aborts. */ walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); - up_read(&mm->mmap_sem); + up_read(&mc.mm->mmap_sem); atomic_dec(&mc.from->moving_account); } -static void mem_cgroup_move_task(struct cgroup_taskset *tset) +static void mem_cgroup_move_task(void) { - struct cgroup_subsys_state *css; - struct task_struct *p = cgroup_taskset_first(tset, &css); - struct mm_struct *mm = get_task_mm(p); - - if (mm) { - if (mc.to) - mem_cgroup_move_charge(mm); - mmput(mm); - } - if (mc.to) + if (mc.to) { + mem_cgroup_move_charge(); mem_cgroup_clear_mc(); + } } #else /* !CONFIG_MMU */ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) @@ -5053,7 +5054,7 @@ static int mem_cgroup_allow_attach(struct cgroup_taskset *tset) static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) { } -static void mem_cgroup_move_task(struct cgroup_taskset *tset) +static void mem_cgroup_move_task(void) { } #endif @@ -5269,6 +5270,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, .allow_attach = mem_cgroup_allow_attach, + .post_attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, .dfl_cftypes = memory_files, .legacy_cftypes = mem_cgroup_legacy_files, diff --git a/mm/memory.c b/mm/memory.c index b80bf4746b67..76dcee317714 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -797,6 +797,46 @@ out: return pfn_to_page(pfn); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd) +{ + unsigned long pfn = pmd_pfn(pmd); + + /* + * There is no pmd_special() but there may be special pmds, e.g. + * in a direct-access (dax) mapping, so let's just replicate the + * !HAVE_PTE_SPECIAL case from vm_normal_page() here. + */ + if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { + if (vma->vm_flags & VM_MIXEDMAP) { + if (!pfn_valid(pfn)) + return NULL; + goto out; + } else { + unsigned long off; + off = (addr - vma->vm_start) >> PAGE_SHIFT; + if (pfn == vma->vm_pgoff + off) + return NULL; + if (!is_cow_mapping(vma->vm_flags)) + return NULL; + } + } + + if (is_zero_pfn(pfn)) + return NULL; + if (unlikely(pfn > highest_memmap_pfn)) + return NULL; + + /* + * NOTE! We still have PageReserved() pages in the page tables. + * eg. VDSO mappings can cause them to exist. + */ +out: + return pfn_to_page(pfn); +} +#endif + /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range diff --git a/mm/migrate.c b/mm/migrate.c index 6d17e0ab42d4..bbeb0b71fcf4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -963,7 +963,13 @@ out: dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); /* Soft-offlined page shouldn't go through lru cache list */ - if (reason == MR_MEMORY_FAILURE) { + if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) { + /* + * With this release, we free successfully migrated + * page and set PG_HWPoison on just freed page + * intentionally. Although it's rather weird, it's how + * HWPoison flag works at the moment. + */ put_page(page); if (!test_set_page_hwpoison(page)) num_poisoned_pages_inc(); diff --git a/mm/slub.c b/mm/slub.c index 46997517406e..65d5f92d51d2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2819,6 +2819,7 @@ struct detached_freelist { void *tail; void *freelist; int cnt; + struct kmem_cache *s; }; /* @@ -2833,8 +2834,9 @@ struct detached_freelist { * synchronization primitive. Look ahead in the array is limited due * to performance reasons. */ -static int build_detached_freelist(struct kmem_cache *s, size_t size, - void **p, struct detached_freelist *df) +static inline +int build_detached_freelist(struct kmem_cache *s, size_t size, + void **p, struct detached_freelist *df) { size_t first_skipped_index = 0; int lookahead = 3; @@ -2850,8 +2852,11 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) return 0; + /* Support for memcg, compiler can optimize this out */ + df->s = cache_from_obj(s, object); + /* Start new detached freelist */ - set_freepointer(s, object, NULL); + set_freepointer(df->s, object, NULL); df->page = virt_to_head_page(object); df->tail = object; df->freelist = object; @@ -2866,7 +2871,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, /* df->page is always set at this point */ if (df->page == virt_to_head_page(object)) { /* Opportunity build freelist */ - set_freepointer(s, object, df->freelist); + set_freepointer(df->s, object, df->freelist); df->freelist = object; df->cnt++; p[size] = NULL; /* mark object processed */ @@ -2885,25 +2890,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size, return first_skipped_index; } - /* Note that interrupts must be enabled when calling this function. */ -void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p) +void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) { if (WARN_ON(!size)) return; do { struct detached_freelist df; - struct kmem_cache *s; - - /* Support for memcg */ - s = cache_from_obj(orig_s, p[size - 1]); size = build_detached_freelist(s, size, p, &df); if (unlikely(!df.page)) continue; - slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); diff --git a/mm/vmscan.c b/mm/vmscan.c index 0c56f6a812bc..2e6547ec1b09 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2577,7 +2577,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) sc->gfp_mask |= __GFP_HIGHMEM; for_each_zone_zonelist_nodemask(zone, z, zonelist, - requested_highidx, sc->nodemask) { + gfp_zone(sc->gfp_mask), sc->nodemask) { enum zone_type classzone_idx; if (!populated_zone(zone)) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..992b35fb8615 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->portid) { + if (nlk->portid && nlk->bound) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 21e20353178e..63fb5ee212cf 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) } crq->q.reader = 0; - crq->item = cache_get(h); crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - if (test_bit(CACHE_PENDING, &h->flags)) + if (test_bit(CACHE_PENDING, &h->flags)) { + crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); - else + } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 75b0d23ee882..5d89f13a98db 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; - if (state != NETLINK_URELEASE) + if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 0b7dc2fd7bac..dd243d2abd87 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -267,10 +267,8 @@ int conf_read_simple(const char *name, int def) if (in) goto load; sym_add_change_count(1); - if (!sym_defconfig_list) { - sym_calc_value(modules_sym); + if (!sym_defconfig_list) return 1; - } for_all_defaults(sym_defconfig_list, prop) { if (expr_calc_value(prop->visible.expr) == no || @@ -403,7 +401,6 @@ setsym: } free(line); fclose(in); - sym_calc_value(modules_sym); return 0; } @@ -414,8 +411,12 @@ int conf_read(const char *name) sym_set_change_count(0); - if (conf_read_simple(name, S_DEF_USER)) + if (conf_read_simple(name, S_DEF_USER)) { + sym_calc_value(modules_sym); return 1; + } + + sym_calc_value(modules_sym); for_all_symbols(i, sym) { sym_calc_value(sym); @@ -846,6 +847,7 @@ static int conf_split_config(void) name = conf_get_autoconfig_name(); conf_read_simple(name, S_DEF_AUTO); + sym_calc_value(modules_sym); if (chdir("include/config")) return 1; diff --git a/scripts/sortextable.c b/scripts/sortextable.c index c2423d913b46..a2c0d620ca80 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -266,9 +266,9 @@ do_file(char const *const fname) break; } /* end switch */ if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 - || r2(&ehdr->e_type) != ET_EXEC + || (r2(&ehdr->e_type) != ET_EXEC && r2(&ehdr->e_type) != ET_DYN) || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { - fprintf(stderr, "unrecognized ET_EXEC file %s\n", fname); + fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname); fail_file(); } @@ -282,12 +282,13 @@ do_file(char const *const fname) case EM_386: case EM_X86_64: case EM_S390: + case EM_AARCH64: + case EM_PARISC: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: case EM_ARCV2: case EM_ARM: - case EM_AARCH64: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: @@ -304,7 +305,7 @@ do_file(char const *const fname) if (r2(&ehdr->e_ehsize) != sizeof(Elf32_Ehdr) || r2(&ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, - "unrecognized ET_EXEC file: %s\n", fname); + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); fail_file(); } do32(ehdr, fname, custom_sort); @@ -314,7 +315,7 @@ do_file(char const *const fname) if (r2(&ghdr->e_ehsize) != sizeof(Elf64_Ehdr) || r2(&ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, - "unrecognized ET_EXEC file: %s\n", fname); + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); fail_file(); } do64(ghdr, fname, custom_sort); diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 5c4fa8eba1d0..367dbf0d285e 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -843,7 +843,7 @@ static hda_nid_t path_power_update(struct hda_codec *codec, bool allow_powerdown) { hda_nid_t nid, changed = 0; - int i, state; + int i, state, power; for (i = 0; i < path->depth; i++) { nid = path->path[i]; @@ -855,7 +855,9 @@ static hda_nid_t path_power_update(struct hda_codec *codec, state = AC_PWRST_D0; else state = AC_PWRST_D3; - if (!snd_hda_check_power_state(codec, nid, state)) { + power = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_POWER_STATE, 0); + if (power != (state | (state << 4))) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, state); changed = nid; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2ff692dd2c5f..411630e9c034 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2207,6 +2207,9 @@ static const struct pci_device_id azx_ids[] = { /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, + /* Broxton-T */ + { PCI_DEVICE(0x8086, 0x1a98), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, /* Haswell */ { PCI_DEVICE(0x8086, 0x0a0c), .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL }, diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index a47e8ae0eb30..80bbadc83721 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -361,6 +361,7 @@ static int cs_parse_auto_config(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; int err; + int i; err = snd_hda_parse_pin_defcfg(codec, &spec->gen.autocfg, NULL, 0); if (err < 0) @@ -370,6 +371,19 @@ static int cs_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + /* keep the ADCs powered up when it's dynamically switchable */ + if (spec->gen.dyn_adc_switch) { + unsigned int done = 0; + for (i = 0; i < spec->gen.input_mux.num_items; i++) { + int idx = spec->gen.dyn_adc_idx[i]; + if (done & (1 << idx)) + continue; + snd_hda_gen_fix_pin_power(codec, + spec->gen.adc_nids[idx]); + done |= 1 << idx; + } + } + return 0; } diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1402ba954b3d..ac4490a96863 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5449,6 +5449,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13), + SND_PCI_QUIRK(0x1028, 0x0669, "Dell Optiplex 9020m", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK), SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), @@ -5583,6 +5584,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK), + SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c index c5194f5b150a..d7e71f309299 100644 --- a/sound/pci/pcxhr/pcxhr_core.c +++ b/sound/pci/pcxhr/pcxhr_core.c @@ -1341,5 +1341,6 @@ irqreturn_t pcxhr_threaded_irq(int irq, void *dev_id) } pcxhr_msg_thread(mgr); + mutex_unlock(&mgr->lock); return IRQ_HANDLED; } diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index f2beb1aa5763..b1c8bb39cdf1 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -359,7 +359,7 @@ static const DECLARE_TLV_DB_RANGE(bst_tlv, /* Interface data select */ static const char * const rt5640_data_select[] = { - "Normal", "left copy to right", "right copy to left", "Swap"}; + "Normal", "Swap", "left copy to right", "right copy to left"}; static SOC_ENUM_SINGLE_DECL(rt5640_if1_dac_enum, RT5640_DIG_INF_DATA, RT5640_IF1_DAC_SEL_SFT, rt5640_data_select); diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h index 3deb8babeabb..243f42633989 100644 --- a/sound/soc/codecs/rt5640.h +++ b/sound/soc/codecs/rt5640.h @@ -442,39 +442,39 @@ #define RT5640_IF1_DAC_SEL_MASK (0x3 << 14) #define RT5640_IF1_DAC_SEL_SFT 14 #define RT5640_IF1_DAC_SEL_NOR (0x0 << 14) -#define RT5640_IF1_DAC_SEL_L2R (0x1 << 14) -#define RT5640_IF1_DAC_SEL_R2L (0x2 << 14) -#define RT5640_IF1_DAC_SEL_SWAP (0x3 << 14) +#define RT5640_IF1_DAC_SEL_SWAP (0x1 << 14) +#define RT5640_IF1_DAC_SEL_L2R (0x2 << 14) +#define RT5640_IF1_DAC_SEL_R2L (0x3 << 14) #define RT5640_IF1_ADC_SEL_MASK (0x3 << 12) #define RT5640_IF1_ADC_SEL_SFT 12 #define RT5640_IF1_ADC_SEL_NOR (0x0 << 12) -#define RT5640_IF1_ADC_SEL_L2R (0x1 << 12) -#define RT5640_IF1_ADC_SEL_R2L (0x2 << 12) -#define RT5640_IF1_ADC_SEL_SWAP (0x3 << 12) +#define RT5640_IF1_ADC_SEL_SWAP (0x1 << 12) +#define RT5640_IF1_ADC_SEL_L2R (0x2 << 12) +#define RT5640_IF1_ADC_SEL_R2L (0x3 << 12) #define RT5640_IF2_DAC_SEL_MASK (0x3 << 10) #define RT5640_IF2_DAC_SEL_SFT 10 #define RT5640_IF2_DAC_SEL_NOR (0x0 << 10) -#define RT5640_IF2_DAC_SEL_L2R (0x1 << 10) -#define RT5640_IF2_DAC_SEL_R2L (0x2 << 10) -#define RT5640_IF2_DAC_SEL_SWAP (0x3 << 10) +#define RT5640_IF2_DAC_SEL_SWAP (0x1 << 10) +#define RT5640_IF2_DAC_SEL_L2R (0x2 << 10) +#define RT5640_IF2_DAC_SEL_R2L (0x3 << 10) #define RT5640_IF2_ADC_SEL_MASK (0x3 << 8) #define RT5640_IF2_ADC_SEL_SFT 8 #define RT5640_IF2_ADC_SEL_NOR (0x0 << 8) -#define RT5640_IF2_ADC_SEL_L2R (0x1 << 8) -#define RT5640_IF2_ADC_SEL_R2L (0x2 << 8) -#define RT5640_IF2_ADC_SEL_SWAP (0x3 << 8) +#define RT5640_IF2_ADC_SEL_SWAP (0x1 << 8) +#define RT5640_IF2_ADC_SEL_L2R (0x2 << 8) +#define RT5640_IF2_ADC_SEL_R2L (0x3 << 8) #define RT5640_IF3_DAC_SEL_MASK (0x3 << 6) #define RT5640_IF3_DAC_SEL_SFT 6 #define RT5640_IF3_DAC_SEL_NOR (0x0 << 6) -#define RT5640_IF3_DAC_SEL_L2R (0x1 << 6) -#define RT5640_IF3_DAC_SEL_R2L (0x2 << 6) -#define RT5640_IF3_DAC_SEL_SWAP (0x3 << 6) +#define RT5640_IF3_DAC_SEL_SWAP (0x1 << 6) +#define RT5640_IF3_DAC_SEL_L2R (0x2 << 6) +#define RT5640_IF3_DAC_SEL_R2L (0x3 << 6) #define RT5640_IF3_ADC_SEL_MASK (0x3 << 4) #define RT5640_IF3_ADC_SEL_SFT 4 #define RT5640_IF3_ADC_SEL_NOR (0x0 << 4) -#define RT5640_IF3_ADC_SEL_L2R (0x1 << 4) -#define RT5640_IF3_ADC_SEL_R2L (0x2 << 4) -#define RT5640_IF3_ADC_SEL_SWAP (0x3 << 4) +#define RT5640_IF3_ADC_SEL_SWAP (0x1 << 4) +#define RT5640_IF3_ADC_SEL_L2R (0x2 << 4) +#define RT5640_IF3_ADC_SEL_R2L (0x3 << 4) /* REC Left Mixer Control 1 (0x3b) */ #define RT5640_G_HP_L_RM_L_MASK (0x7 << 13) diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c index e619d5651b09..080c78e88e10 100644 --- a/sound/soc/codecs/ssm4567.c +++ b/sound/soc/codecs/ssm4567.c @@ -352,6 +352,11 @@ static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable) regcache_cache_only(ssm4567->regmap, !enable); if (enable) { + ret = regmap_write(ssm4567->regmap, SSM4567_REG_SOFT_RESET, + 0x00); + if (ret) + return ret; + ret = regmap_update_bits(ssm4567->regmap, SSM4567_REG_POWER_CTRL, SSM4567_POWER_SPWDN, 0x00); diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c index df65c5b494b1..b6ab3fc5789e 100644 --- a/sound/soc/samsung/s3c-i2s-v2.c +++ b/sound/soc/samsung/s3c-i2s-v2.c @@ -709,7 +709,7 @@ static int s3c2412_i2s_resume(struct snd_soc_dai *dai) #endif int s3c_i2sv2_register_component(struct device *dev, int id, - struct snd_soc_component_driver *cmp_drv, + const struct snd_soc_component_driver *cmp_drv, struct snd_soc_dai_driver *dai_drv) { struct snd_soc_dai_ops *ops = (struct snd_soc_dai_ops *)dai_drv->ops; diff --git a/sound/soc/samsung/s3c-i2s-v2.h b/sound/soc/samsung/s3c-i2s-v2.h index 90abab364b49..d0684145ed1f 100644 --- a/sound/soc/samsung/s3c-i2s-v2.h +++ b/sound/soc/samsung/s3c-i2s-v2.h @@ -101,7 +101,7 @@ extern int s3c_i2sv2_probe(struct snd_soc_dai *dai, * soc core. */ extern int s3c_i2sv2_register_component(struct device *dev, int id, - struct snd_soc_component_driver *cmp_drv, + const struct snd_soc_component_driver *cmp_drv, struct snd_soc_dai_driver *dai_drv); #endif /* __SND_SOC_S3C24XX_S3C_I2SV2_I2S_H */ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 416514fe9e63..afb70a5d4fd3 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2188,6 +2188,13 @@ static ssize_t dapm_widget_show_component(struct snd_soc_component *cmpnt, int count = 0; char *state = "not set"; + /* card won't be set for the dummy component, as a spot fix + * we're checking for that case specifically here but in future + * we will ensure that the dummy component looks like others. + */ + if (!cmpnt->card) + return 0; + list_for_each_entry(w, &cmpnt->card->widgets, list) { if (w->dapm != dapm) continue; diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4e074a660826..90c3558c2c12 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -62,6 +62,14 @@ OPTIONS --scale:: scale/normalize counter values +-d:: +--detailed:: + print more detailed statistics, can be specified up to 3 times + + -d: detailed events, L1 and LLC data cache + -d -d: more detailed events, dTLB and iTLB events + -d -d -d: very detailed events, adding prefetch events + -r:: --repeat=<n>:: repeat command and print average + stddev (max: 100). 0 means forever. diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 81def6c3f24b..3900386a3629 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2059,10 +2059,12 @@ skip_annotation: * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8b10621b415c..956187bf1a85 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -274,7 +274,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d1392194a9a9..b4b96120fc3b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1211,12 +1211,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, */ if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + evlist->cpus = cpu_map__get(cpus); } if (threads != evlist->threads) { thread_map__put(evlist->threads); - evlist->threads = threads; + evlist->threads = thread_map__get(threads); } perf_evlist__propagate_maps(evlist); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 97f963a3dcb9..9227c2f076c3 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1127,7 +1127,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", ret); - if (pt->synth_opts.callchain) + if (pt->synth_opts.last_branch) intel_pt_reset_last_branch_rb(ptq); return ret; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index ea6064696fe4..a7b9022b5c8f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -86,6 +86,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; + WARN_ON(!kvm_timer_should_fire(vcpu)); + /* * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. @@ -93,10 +95,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) kvm_vcpu_kick(vcpu); } +static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu) +{ + cycle_t cval, now; + + cval = vcpu->arch.timer_cpu.cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) { struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + timer = container_of(hrt, struct arch_timer_cpu, timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_compute_delta(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + queue_work(wqueue, &timer->expired); return HRTIMER_NORESTART; } @@ -170,8 +208,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu) void kvm_timer_schedule(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - u64 ns; - cycle_t cval, now; BUG_ON(timer_is_armed(timer)); @@ -191,14 +227,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu) return; /* The timer has not yet expired, schedule a background timer */ - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, - cval - now, - timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); + timer_arm(timer, kvm_timer_compute_delta(vcpu)); } void kvm_timer_unschedule(struct kvm_vcpu *vcpu) |