diff options
341 files changed, 6614 insertions, 1953 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index db7aab1516de..b8d0a30f1644 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -192,3 +192,14 @@ Date: November 2017 Contact: "Sheng Yong" <shengyong1@huawei.com> Description: Controls readahead inode block in readdir. + +What: /sys/fs/f2fs/<disk>/extension_list +Date: Feburary 2018 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Used to control configure extension list: + - Query: cat /sys/fs/f2fs/<disk>/extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs/<disk>/extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs/<disk>/extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 89fd8f9a259f..b3d2e4a42255 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -109,6 +109,17 @@ fec_start <offset> This is the offset, in <data_block_size> blocks, from the start of the FEC device to the beginning of the encoding data. +check_at_most_once + Verify data blocks only the first time they are read from the data device, + rather than every time. This reduces the overhead of dm-verity so that it + can be used on systems that are memory and/or CPU constrained. However, it + provides a reduced level of security because only offline tampering of the + data device's content will be detected, not online tampering. + + Hash blocks are still verified each time they are read from the hash device, + since verification of hash blocks is less performance critical than data + blocks, and a hash block will not be verified any more after all the data + blocks it covers have been verified anyway. Theory of operation =================== diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 6cf9ad12c57f..1f52baea2f69 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -172,6 +172,23 @@ offgrpjquota Turn off group journelled quota. offprjjquota Turn off project journelled quota. quota Enable plain user disk quota accounting. noquota Disable all plain disk quota option. +whint_mode=%s Control which write hints are passed down to block + layer. This supports "off", "user-based", and + "fs-based". In "off" mode (default), f2fs does not pass + down hints. In "user-based" mode, f2fs tries to pass + down hints given by users. And in "fs-based" mode, f2fs + passes down hints with its policy. +alloc_mode=%s Adjust block allocation policy, which supports "reuse" + and "default". +fsync_mode=%s Control the policy of fsync. Currently supports "posix" + and "strict". In "posix" mode, which is default, fsync + will follow POSIX semantics and does a light operation + to improve the filesystem performance. In "strict" mode, + fsync will be heavy and behaves in line with xfs, ext4 + and btrfs, where xfstest generic/342 will pass, but the + performance will regress. +test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt + context. The fake fscrypt context is used by xfstests. ================================================================================ DEBUGFS ENTRIES @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 127 +SUBLEVEL = 129 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/boot/dts/at91sam9g25.dtsi b/arch/arm/boot/dts/at91sam9g25.dtsi index a7da0dd0c98f..0898213f3bb2 100644 --- a/arch/arm/boot/dts/at91sam9g25.dtsi +++ b/arch/arm/boot/dts/at91sam9g25.dtsi @@ -21,7 +21,7 @@ atmel,mux-mask = < /* A B C */ 0xffffffff 0xffe0399f 0xc000001c /* pioA */ - 0x0007ffff 0x8000fe3f 0x00000000 /* pioB */ + 0x0007ffff 0x00047e3f 0x00000000 /* pioB */ 0x80000000 0x07c0ffff 0xb83fffff /* pioC */ 0x003fffff 0x003f8000 0x00000000 /* pioD */ >; diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts index 96d7eede412e..036c9bd9bf75 100644 --- a/arch/arm/boot/dts/imx53-qsrb.dts +++ b/arch/arm/boot/dts/imx53-qsrb.dts @@ -23,7 +23,7 @@ imx53-qsrb { pinctrl_pmic: pmicgrp { fsl,pins = < - MX53_PAD_CSI0_DAT5__GPIO5_23 0x1e4 /* IRQ */ + MX53_PAD_CSI0_DAT5__GPIO5_23 0x1c4 /* IRQ */ >; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index 9e096d811bed..7a032dd84bb2 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -88,6 +88,7 @@ clocks = <&clks 201>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_3p3v>; + lrclk-strength = <3>; }; }; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 9430a9928199..00de37fe5f8a 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -132,7 +132,7 @@ }; esdhc: esdhc@1560000 { - compatible = "fsl,esdhc"; + compatible = "fsl,ls1021a-esdhc", "fsl,esdhc"; reg = <0x0 0x1560000 0x0 0x10000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; clock-frequency = <0>; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 3daf8d5d7878..fb0d1b252dc8 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -1354,7 +1354,7 @@ pinctrl@fc06a000 { #address-cells = <1>; #size-cells = <1>; - compatible = "atmel,at91sam9x5-pinctrl", "atmel,at91rm9200-pinctrl", "simple-bus"; + compatible = "atmel,sama5d3-pinctrl", "atmel,at91sam9x5-pinctrl", "simple-bus"; ranges = <0xfc068000 0xfc068000 0x100 0xfc06a000 0xfc06a000 0x4000>; /* WARNING: revisit as pin spec has changed */ diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h index 71e473d05fcc..620dc75362e5 100644 --- a/arch/arm/include/asm/xen/events.h +++ b/arch/arm/include/asm/xen/events.h @@ -16,7 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->ARM_cpsr); } -#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((ptr), \ +#define xchg_xen_ulong(ptr, val) atomic64_xchg(container_of((long long*)(ptr),\ atomic64_t, \ counter), (val)) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 28c90bc372bd..78d325f3245a 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -795,6 +795,8 @@ static struct platform_device da8xx_dsp = { .resource = da8xx_rproc_resources, }; +static bool rproc_mem_inited __initdata; + #if IS_ENABLED(CONFIG_DA8XX_REMOTEPROC) static phys_addr_t rproc_base __initdata; @@ -833,6 +835,8 @@ void __init da8xx_rproc_reserve_cma(void) ret = dma_declare_contiguous(&da8xx_dsp.dev, rproc_size, rproc_base, 0); if (ret) pr_err("%s: dma_declare_contiguous failed %d\n", __func__, ret); + else + rproc_mem_inited = true; } #else @@ -847,6 +851,12 @@ int __init da8xx_register_rproc(void) { int ret; + if (!rproc_mem_inited) { + pr_warn("%s: memory not reserved for DSP, not registering DSP device\n", + __func__); + return -ENOMEM; + } + ret = platform_device_register(&da8xx_dsp); if (ret) pr_err("%s: can't register DSP device: %d\n", __func__, ret); diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c index 5b0f752d5507..24be631e487d 100644 --- a/arch/arm/mach-imx/cpu.c +++ b/arch/arm/mach-imx/cpu.c @@ -133,6 +133,9 @@ struct device * __init imx_soc_device_init(void) case MXC_CPU_IMX6UL: soc_id = "i.MX6UL"; break; + case MXC_CPU_IMX6ULL: + soc_id = "i.MX6ULL"; + break; case MXC_CPU_IMX7D: soc_id = "i.MX7D"; break; diff --git a/arch/arm/mach-imx/mxc.h b/arch/arm/mach-imx/mxc.h index a5b1af6d7441..478cd91d0885 100644 --- a/arch/arm/mach-imx/mxc.h +++ b/arch/arm/mach-imx/mxc.h @@ -39,6 +39,7 @@ #define MXC_CPU_IMX6SX 0x62 #define MXC_CPU_IMX6Q 0x63 #define MXC_CPU_IMX6UL 0x64 +#define MXC_CPU_IMX6ULL 0x65 #define MXC_CPU_IMX7D 0x72 #define IMX_DDR_TYPE_LPDDR2 1 @@ -171,6 +172,11 @@ static inline bool cpu_is_imx6ul(void) return __mxc_cpu_type == MXC_CPU_IMX6UL; } +static inline bool cpu_is_imx6ull(void) +{ + return __mxc_cpu_type == MXC_CPU_IMX6ULL; +} + static inline bool cpu_is_imx6q(void) { return __mxc_cpu_type == MXC_CPU_IMX6Q; diff --git a/arch/arm64/configs/msm-auto-perf_defconfig b/arch/arm64/configs/msm-auto-perf_defconfig index 03446d70ca77..3d7794040c2a 100644 --- a/arch/arm64/configs/msm-auto-perf_defconfig +++ b/arch/arm64/configs/msm-auto-perf_defconfig @@ -232,6 +232,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y CONFIG_RFKILL=y CONFIG_IPC_ROUTER=y diff --git a/arch/arm64/configs/msm-auto_defconfig b/arch/arm64/configs/msm-auto_defconfig index b033d8bd041f..7bbf952a1245 100644 --- a/arch/arm64/configs/msm-auto_defconfig +++ b/arch/arm64/configs/msm-auto_defconfig @@ -234,6 +234,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y CONFIG_RFKILL=y CONFIG_IPC_ROUTER=y diff --git a/arch/arm64/configs/msmcortex-perf_defconfig b/arch/arm64/configs/msmcortex-perf_defconfig index 72ef08668808..61e1e347532d 100644 --- a/arch/arm64/configs/msmcortex-perf_defconfig +++ b/arch/arm64/configs/msmcortex-perf_defconfig @@ -232,6 +232,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y CONFIG_RFKILL=y CONFIG_NFC_NQ=y diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig index 039efa9a16e0..48d4f4207735 100644 --- a/arch/arm64/configs/msmcortex_defconfig +++ b/arch/arm64/configs/msmcortex_defconfig @@ -233,6 +233,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_RFKILL=y diff --git a/arch/arm64/configs/msmcortex_mediabox-perf_defconfig b/arch/arm64/configs/msmcortex_mediabox-perf_defconfig index 0feefe5ba25a..2c7be3c7b5d6 100644 --- a/arch/arm64/configs/msmcortex_mediabox-perf_defconfig +++ b/arch/arm64/configs/msmcortex_mediabox-perf_defconfig @@ -232,6 +232,7 @@ CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m diff --git a/arch/arm64/configs/msmcortex_mediabox_defconfig b/arch/arm64/configs/msmcortex_mediabox_defconfig index 8c1bce2e4f12..03e43dc0b7c0 100644 --- a/arch/arm64/configs/msmcortex_mediabox_defconfig +++ b/arch/arm64/configs/msmcortex_mediabox_defconfig @@ -233,6 +233,7 @@ CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_CFG80211_WEXT=y diff --git a/arch/arm64/configs/sdm660-perf_defconfig b/arch/arm64/configs/sdm660-perf_defconfig index 81cf6eb30e90..1a832addca83 100644 --- a/arch/arm64/configs/sdm660-perf_defconfig +++ b/arch/arm64/configs/sdm660-perf_defconfig @@ -232,6 +232,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y CONFIG_RFKILL=y CONFIG_NFC_NQ=y diff --git a/arch/arm64/configs/sdm660_defconfig b/arch/arm64/configs/sdm660_defconfig index bd7ec3ab3a94..85175deb6efa 100644 --- a/arch/arm64/configs/sdm660_defconfig +++ b/arch/arm64/configs/sdm660_defconfig @@ -233,6 +233,8 @@ CONFIG_MSM_BT_POWER=y CONFIG_BTFM_SLIM=y CONFIG_BTFM_SLIM_WCN3990=y CONFIG_CFG80211=y +CONFIG_CFG80211_CERTIFICATION_ONUS=y +CONFIG_CFG80211_REG_CELLULAR_HINTS=y CONFIG_CFG80211_INTERNAL_REGDB=y # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_RFKILL=y diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index daba1f9a4f79..174aedce3167 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -40,7 +40,8 @@ typedef union mips_instruction kprobe_opcode_t; #define flush_insn_slot(p) \ do { \ - flush_icache_range((unsigned long)p->addr, \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ (unsigned long)p->addr + \ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ } while (0) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 832e2167d00f..ef7c02af7522 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -18,6 +18,10 @@ #include <asm-generic/pgtable-nopmd.h> +#ifdef CONFIG_HIGHMEM +#include <asm/highmem.h> +#endif + extern int temp_tlb_entry; /* @@ -61,7 +65,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, #define VMALLOC_START MAP_BASE -#define PKMAP_BASE (0xfe000000UL) +#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1)) +#define PKMAP_BASE (PKMAP_END - PAGE_SIZE * LAST_PKMAP) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index b6e20f3053f4..a827655c052c 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -1238,6 +1238,13 @@ __clear_user(void __user *addr, __kernel_size_t size) { __kernel_size_t res; +#ifdef CONFIG_CPU_MICROMIPS +/* micromips memset / bzero also clobbers t7 & t8 */ +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$15", "$24", "$31" +#else +#define bzero_clobbers "$4", "$5", "$6", __UA_t0, __UA_t1, "$31" +#endif /* CONFIG_CPU_MICROMIPS */ + if (eva_kernel_access()) { __asm__ __volatile__( "move\t$4, %1\n\t" @@ -1247,7 +1254,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } else { might_fault(); __asm__ __volatile__( @@ -1258,7 +1265,7 @@ __clear_user(void __user *addr, __kernel_size_t size) "move\t%0, $6" : "=r" (res) : "r" (addr), "r" (size) - : "$4", "$5", "$6", __UA_t0, __UA_t1, "$31"); + : bzero_clobbers); } return res; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 8f0019a2e5c8..2d33cf2185d9 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -218,7 +218,7 @@ 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) bne t1, a0, 1b - sb a1, -1(a0) + EX(sb, a1, -1(a0), .Lsmall_fixup\@) 2: jr ra /* done */ move a2, zero @@ -249,13 +249,18 @@ PTR_L t0, TI_TASK($28) andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) - LONG_ADDU a2, t1 + LONG_ADDU a2, a0 jr ra LONG_SUBU a2, t0 .Llast_fixup\@: jr ra - andi v1, a2, STORMASK + nop + +.Lsmall_fixup\@: + PTR_SUBU a2, t1, a0 + jr ra + PTR_ADDIU a2, 1 .endm diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index adc6911ba748..b19a3c506b1e 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -51,15 +51,15 @@ void __init pagetable_init(void) /* * Fixed mappings: */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base); #ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ vaddr = PKMAP_BASE; - fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + __pgd_offset(vaddr); pud = pud_offset(pgd, vaddr); diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index dba508fe1683..4f7060ec6875 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -648,6 +648,10 @@ static int match_pci_device(struct device *dev, int index, (modpath->mod == PCI_FUNC(devfn))); } + /* index might be out of bounds for bc[] */ + if (index >= 6) + return 0; + id = PCI_SLOT(pdev->devfn) | (PCI_FUNC(pdev->devfn) << 5); return (modpath->bc[index] == id); } diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 0eca6efc0631..b9e16855a037 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -36,7 +36,8 @@ #define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) -#ifdef __SUBARCH_HAS_LWSYNC +/* The sub-arch has lwsync */ +#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) # define SMPWMB LWSYNC #else # define SMPWMB eieio diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 07a99e638449..bab3461115bb 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -21,6 +21,9 @@ /* We calculate number of sg entries based on PAGE_SIZE */ #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) +/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */ +#define OPAL_BUSY_DELAY_MS 10 + /* /sys/firmware/opal */ extern struct kobject *opal_kobj; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 3140c19c448c..70b379ee6b7e 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,19 @@ extern long long virt_phys_offset; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * On hash the vmalloc and other regions alias to the kernel region when passed + * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can + * return true for some vmalloc addresses, which is incorrect. So explicitly + * check that the address is in the kernel region. + */ +#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ + pfn_valid(virt_to_pfn(kaddr))) +#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#endif /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index c50868681f9e..e8d6a842f4bb 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -5,10 +5,6 @@ #include <linux/stringify.h> #include <asm/feature-fixups.h> -#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) -#define __SUBARCH_HAS_LWSYNC -#endif - #ifndef __ASSEMBLY__ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; extern void do_lwsync_fixups(unsigned long value, void *fixup_start, diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 98f81800e00c..304f07cfa262 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -788,7 +788,8 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev) eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]); /* PCI Command: 0x4 */ - eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]); + eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); /* Check the PCIe link is ready */ eeh_bridge_check_link(edev); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1be1092c7204..9baba9576e99 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -686,12 +686,20 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + unsigned int tcr; + /* Clear any pending timer interrupts */ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - /* Enable decrementer interrupt */ - mtspr(SPRN_TCR, TCR_DIE); -#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ + tcr = mfspr(SPRN_TCR); + /* + * The watchdog may have already been enabled by u-boot. So leave + * TRC[WP] (Watchdog Period) alone. + */ + tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */ + tcr |= TCR_DIE; /* Enable decrementer */ + mtspr(SPRN_TCR, tcr); +#endif } void __init generic_calibrate_decr(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f2c75a1e0536..0d91baf63fed 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -50,7 +50,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) pteg_addr = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)); + ret = H_FUNCTION; + if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg))) + goto done; hpte = pteg; ret = H_PTEG_FULL; @@ -71,7 +73,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu) hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6)); hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7)); pteg_addr += i * HPTE_SIZE; - copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE)) + goto done; kvmppc_set_gpr(vcpu, 4, pte_index | i); ret = H_SUCCESS; @@ -93,7 +97,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -103,7 +109,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) ((flags & H_ANDCOND) && (pte[0] & avpn) != 0)) goto done; - copy_to_user((void __user *)pteg, &v, sizeof(v)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) + goto done; rb = compute_tlbie_rb(pte[0], pte[1], pte_index); vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); @@ -171,7 +179,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) } pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) { + ret = H_FUNCTION; + break; + } pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -184,7 +195,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) tsh |= H_BULK_REMOVE_NOT_FOUND; } else { /* Splat the pteg in (userland) hpt */ - copy_to_user((void __user *)pteg, &v, sizeof(v)); + if (copy_to_user((void __user *)pteg, &v, sizeof(v))) { + ret = H_FUNCTION; + break; + } rb = compute_tlbie_rb(pte[0], pte[1], tsh & H_BULK_REMOVE_PTEX); @@ -211,7 +225,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) pteg = get_pteg_addr(vcpu, pte_index); mutex_lock(&vcpu->kvm->arch.hpt_mutex); - copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + ret = H_FUNCTION; + if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) + goto done; pte[0] = be64_to_cpu((__force __be64)pte[0]); pte[1] = be64_to_cpu((__force __be64)pte[1]); @@ -234,7 +250,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); pte[0] = (__force u64)cpu_to_be64(pte[0]); pte[1] = (__force u64)cpu_to_be64(pte[1]); - copy_to_user((void __user *)pteg, pte, sizeof(pte)); + ret = H_FUNCTION; + if (copy_to_user((void __user *)pteg, pte, sizeof(pte))) + goto done; ret = H_SUCCESS; done: diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index a18d648d31a6..3af014684872 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -53,7 +53,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, unsigned int *target = (unsigned int *)branch_target(src); /* Branch within the section doesn't need translating */ - if (target < alt_start || target >= alt_end) { + if (target < alt_start || target > alt_end) { instr = translate_branch(dest, src); if (!instr) return 1; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index be6212ddbf06..7e42e3ec2142 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -174,6 +174,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (!dump_skip(cprm, roundup(cprm->written - total + sz, 4) - cprm->written)) goto Eio; + + rc = 0; out: free_page((unsigned long)buf); return rc; diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 9db4398ded5d..1bceb95f422d 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -11,6 +11,7 @@ #define DEBUG +#include <linux/delay.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/of.h> @@ -56,9 +57,17 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); - if (rc == OPAL_BUSY_EVENT) + if (rc == OPAL_BUSY_EVENT) { + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + msleep(OPAL_BUSY_DELAY_MS); + } } + + if (rc) + return -EIO; + *index += count; return count; } diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index b2e5902bd8f4..c670279b33f0 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -318,7 +318,7 @@ static void hypfs_kill_super(struct super_block *sb) if (sb->s_root) hypfs_delete_tree(sb->s_root); - if (sb_info->update_file) + if (sb_info && sb_info->update_file) hypfs_remove(sb_info->update_file); kfree(sb->s_fs_info); sb->s_fs_info = NULL; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 42570d8fb265..e73979236659 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -798,6 +798,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, /* copy and convert to ebcdic */ memcpy(ipb->hdr.loadparm, buf, lp_len); ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN); + ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID; return len; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 0f41a8286378..da4f3f2a8186 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -21,8 +21,14 @@ SECTIONS { . = 0x00000000; .text : { - _text = .; /* Text and read-only data */ + /* Text and read-only data */ HEAD_TEXT + /* + * E.g. perf doesn't like symbols starting at address zero, + * therefore skip the initial PSW and channel program located + * at address zero and let _text start at 0x200. + */ + _text = 0x200; TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 59d503866431..9cc600b2d68c 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1733,9 +1733,14 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) lp->rcv_nxt = p->seqid; + /* + * If this is a control-only packet, there is nothing + * else to do but advance the rx queue since the packet + * was already processed above. + */ if (!(p->type & LDC_DATA)) { new = rx_advance(lp, new); - goto no_data; + break; } if (p->stype & (LDC_ACK | LDC_NACK)) { err = data_ack_nack(lp, p); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c211153ca69a..56648f4f8b41 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -140,7 +140,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { static void hard_handler(int sig, siginfo_t *si, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; mcontext_t *mc = &uc->uc_mcontext; unsigned long pending = 1UL << sig; diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig new file mode 100644 index 000000000000..512d009146dd --- /dev/null +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -0,0 +1,442 @@ +CONFIG_POSIX_MQUEUE=y +# CONFIG_USELIB is not set +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_NAMESPACES=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_LZ4 is not set +CONFIG_KALLSYMS_ALL=y +# CONFIG_PCSPKR_PLATFORM is not set +CONFIG_BPF_SYSCALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y +CONFIG_JUMP_LABEL=y +CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SMP=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_MCORE2=y +CONFIG_PROCESSOR_SELECT=y +# CONFIG_CPU_SUP_CENTAUR is not set +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT=y +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_KSM=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_TRANSPARENT_HUGEPAGE=y +# CONFIG_MTRR is not set +CONFIG_HZ_100=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_PHYSICAL_START=0x200000 +CONFIG_RANDOMIZE_BASE=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 reboot=p" +CONFIG_PM_WAKELOCKS=y +CONFIG_PM_WAKELOCKS_LIMIT=0 +# CONFIG_PM_WAKELOCKS_GC is not set +CONFIG_PM_DEBUG=y +CONFIG_ACPI_PROCFS_POWER=y +# CONFIG_ACPI_FAN is not set +# CONFIG_ACPI_THERMAL is not set +# CONFIG_X86_PM_TIMER is not set +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_ACPI_CPUFREQ_CPB is not set +CONFIG_PCI_MMCONFIG=y +CONFIG_PCI_MSI=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_INET_ESP=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG_DESTROY=y +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_OPTIMISTIC_DAD=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_INET6_IPCOMP=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CT_PROTO_DCCP=y +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_CT_PROTO_UDPLITE=y +CONFIG_NF_CONNTRACK_AMANDA=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_H323=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_NETBIOS_NS=y +CONFIG_NF_CONNTRACK_PPTP=y +CONFIG_NF_CONNTRACK_SANE=y +CONFIG_NF_CONNTRACK_TFTP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=y +CONFIG_NETFILTER_XT_TARGET_CONNMARK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=y +CONFIG_NETFILTER_XT_TARGET_TPROXY=y +CONFIG_NETFILTER_XT_TARGET_TRACE=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_COMMENT=y +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=y +CONFIG_NETFILTER_XT_MATCH_CONNMARK=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y +CONFIG_NETFILTER_XT_MATCH_HELPER=y +CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MATCH_LIMIT=y +CONFIG_NETFILTER_XT_MATCH_MAC=y +CONFIG_NETFILTER_XT_MATCH_MARK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y +CONFIG_NETFILTER_XT_MATCH_QTAGUID=y +CONFIG_NETFILTER_XT_MATCH_QUOTA=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_SOCKET=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETFILTER_XT_MATCH_STRING=y +CONFIG_NETFILTER_XT_MATCH_TIME=y +CONFIG_NETFILTER_XT_MATCH_U32=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_MATCH_AH=y +CONFIG_IP_NF_MATCH_ECN=y +CONFIG_IP_NF_MATCH_TTL=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_NETMAP=y +CONFIG_IP_NF_TARGET_REDIRECT=y +CONFIG_IP_NF_MANGLE=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_SECURITY=y +CONFIG_IP_NF_ARPTABLES=y +CONFIG_IP_NF_ARPFILTER=y +CONFIG_IP_NF_ARP_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_MATCH_RPFILTER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_IP6_NF_RAW=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=y +CONFIG_NET_CLS_U32=y +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_U32=y +CONFIG_NET_CLS_ACT=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_RFKILL=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEBUG_DEVRES=y +CONFIG_OF=y +CONFIG_OF_UNITTEST=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y +CONFIG_UID_SYS_STATS=y +CONFIG_MEMORY_STATE_TIME=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_VIRTIO=y +CONFIG_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_DM_UEVENT=y +CONFIG_DM_VERITY=y +CONFIG_DM_VERITY_FEC=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_TUN=y +CONFIG_VIRTIO_NET=y +# CONFIG_ETHERNET is not set +CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=y +CONFIG_PPP_DEFLATE=y +CONFIG_PPP_MPPE=y +CONFIG_PPPOLAC=y +CONFIG_PPPOPNS=y +CONFIG_USB_USBNET=y +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +# CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_NCM is not set +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_NET_CDC_SUBSET is not set +# CONFIG_USB_NET_ZAURUS is not set +CONFIG_MAC80211_HWSIM=y +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_KEYRESET=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_XPAD=y +CONFIG_JOYSTICK_XPAD_FF=y +CONFIG_JOYSTICK_XPAD_LEDS=y +CONFIG_INPUT_TABLET=y +CONFIG_TABLET_USB_ACECAD=y +CONFIG_TABLET_USB_AIPTEK=y +CONFIG_TABLET_USB_GTCO=y +CONFIG_TABLET_USB_HANWANG=y +CONFIG_TABLET_USB_KBTAB=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_KEYCHORD=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVMEM is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=48 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +# CONFIG_HW_RANDOM_VIA is not set +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP_DEFAULT is not set +# CONFIG_DEVPORT is not set +# CONFIG_ACPI_I2C_OPREGION is not set +# CONFIG_I2C_COMPAT is not set +# CONFIG_I2C_HELPER_AUTO is not set +CONFIG_PTP_1588_CLOCK=y +CONFIG_GPIOLIB=y +# CONFIG_HWMON is not set +# CONFIG_X86_PKG_TEMP_THERMAL is not set +CONFIG_WATCHDOG=y +CONFIG_SOFT_WATCHDOG=y +CONFIG_MEDIA_SUPPORT=y +# CONFIG_DVB_TUNER_DIB0070 is not set +# CONFIG_DVB_TUNER_DIB0090 is not set +# CONFIG_VGA_ARB is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_HIDRAW=y +CONFIG_UHID=y +# CONFIG_HID_GENERIC is not set +CONFIG_HID_A4TECH=y +CONFIG_HID_ACRUX=y +CONFIG_HID_ACRUX_FF=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_PRODIKEYS=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_DRAGONRISE_FF=y +CONFIG_HID_EMS_FF=y +CONFIG_HID_ELECOM=y +CONFIG_HID_EZKEY=y +CONFIG_HID_HOLTEK=y +CONFIG_HID_KEYTOUCH=y +CONFIG_HID_KYE=y +CONFIG_HID_UCLOGIC=y +CONFIG_HID_WALTOP=y +CONFIG_HID_GYRATION=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LCPOWER=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_LOGITECH_DJ=y +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MAGICMOUSE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NTRIG=y +CONFIG_HID_ORTEK=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_PICOLCD=y +CONFIG_HID_PRIMAX=y +CONFIG_HID_ROCCAT=y +CONFIG_HID_SAITEK=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SPEEDLINK=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_GREENASIA=y +CONFIG_GREENASIA_FF=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_SMARTJOYPLUS_FF=y +CONFIG_HID_TIVO=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_THRUSTMASTER=y +CONFIG_HID_WACOM=y +CONFIG_HID_WIIMOTE=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HID_ZYDACRON=y +CONFIG_USB_HIDDEV=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_GADGET=y +CONFIG_USB_DUMMY_HCD=y +CONFIG_USB_CONFIGFS=y +CONFIG_USB_CONFIGFS_F_FS=y +CONFIG_USB_CONFIGFS_F_ACC=y +CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y +CONFIG_USB_CONFIGFS_UEVENT=y +CONFIG_USB_CONFIGFS_F_MIDI=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_STAGING=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +CONFIG_ANDROID_VSOC=y +CONFIG_ION=y +# CONFIG_X86_PLATFORM_DEVICES is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +# CONFIG_FIRMWARE_MEMMAP is not set +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_ENCRYPTION=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_SDCARD_FS=y +CONFIG_PSTORE=y +CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_PANIC_TIMEOUT=5 +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_IO_DELAY_NONE=y +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y +CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_PATH=y +CONFIG_HARDENED_USERCOPY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_ECHAINIV=y diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c7c4d9c51e99..eb02087650d2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -365,6 +365,8 @@ static int __init tsc_setup(char *str) tsc_clocksource_reliable = 1; if (!strncmp(str, "noirqtime", 9)) no_sched_irq_time = 1; + if (!strcmp(str, "unstable")) + mark_tsc_unstable("boot parameter"); return 1; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2038e5bacce6..42654375b73f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1386,6 +1386,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1531,18 +1532,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -1551,7 +1548,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 46bbc69844bd..528b4352fa99 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7657,11 +7657,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7716,6 +7718,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 7e48807b2fa1..45a53dfe1859 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -55,7 +55,7 @@ ENTRY(csum_partial_copy_generic) movq %r12, 3*8(%rsp) movq %r14, 4*8(%rsp) movq %r13, 5*8(%rsp) - movq %rbp, 6*8(%rsp) + movq %r15, 6*8(%rsp) movq %r8, (%rsp) movq %r9, 1*8(%rsp) @@ -74,7 +74,7 @@ ENTRY(csum_partial_copy_generic) /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ - /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ + /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: source @@ -89,7 +89,7 @@ ENTRY(csum_partial_copy_generic) source movq 32(%rdi), %r10 source - movq 40(%rdi), %rbp + movq 40(%rdi), %r15 source movq 48(%rdi), %r14 source @@ -103,7 +103,7 @@ ENTRY(csum_partial_copy_generic) adcq %r11, %rax adcq %rdx, %rax adcq %r10, %rax - adcq %rbp, %rax + adcq %r15, %rax adcq %r14, %rax adcq %r13, %rax @@ -121,7 +121,7 @@ ENTRY(csum_partial_copy_generic) dest movq %r10, 32(%rsi) dest - movq %rbp, 40(%rsi) + movq %r15, 40(%rsi) dest movq %r14, 48(%rsi) dest @@ -203,7 +203,7 @@ ENTRY(csum_partial_copy_generic) movq 3*8(%rsp), %r12 movq 4*8(%rsp), %r14 movq 5*8(%rsp), %r13 - movq 6*8(%rsp), %rbp + movq 6*8(%rsp), %r15 addq $7*8, %rsp ret diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index 1518d2805ae8..fd6825537b97 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -10,7 +10,7 @@ void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { - struct ucontext *uc = p; + ucontext_t *uc = p; GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), &uc->uc_mcontext); diff --git a/block/bio-integrity.c b/block/bio-integrity.c index f6325d573c10..6e091ccadcd4 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -175,6 +175,9 @@ bool bio_integrity_enabled(struct bio *bio) if (!bio_is_rw(bio)) return false; + if (!bio_sectors(bio)) + return false; + /* Already protected? */ if (bio_integrity(bio)) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1452db06ba45..d65ddc18d7e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1252,13 +1252,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + blk_queue_split(q, &bio, q->bio_split); + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { bio_io_error(bio); return BLK_QC_T_NONE; } - blk_queue_split(q, &bio, q->bio_split); - if (!is_flush_fua && !blk_queue_nomerges(q) && blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; @@ -1634,7 +1634,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, { unsigned flush_start_tag = set->queue_depth; - blk_mq_tag_idle(hctx); + if (blk_mq_hw_queue_mapped(hctx)) + blk_mq_tag_idle(hctx); if (set->ops->exit_request) set->ops->exit_request(set->driver_data, diff --git a/block/partition-generic.c b/block/partition-generic.c index 19cf33b91a5a..f75be6a4411f 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); - if (!pinfo) + if (!pinfo) { + err = -ENOMEM; goto out_free_stats; + } memcpy(pinfo, info, sizeof(*info)); p->info = pinfo; } diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 new file mode 100644 index 000000000000..b3d89109fe75 --- /dev/null +++ b/build.config.cuttlefish.x86_64 @@ -0,0 +1,15 @@ +ARCH=x86_64 +BRANCH=android-4.4 +CLANG_TRIPLE=x86_64-linux-gnu- +CROSS_COMPILE=x86_64-linux-androidkernel- +DEFCONFIG=x86_64_cuttlefish_defconfig +EXTRA_CMDS='' +KERNEL_DIR=common +POST_DEFCONFIG_CMDS="check_defconfig" +CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-4630689/bin +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 84f8d4d8b6bc..09f706b7b06e 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -62,9 +62,6 @@ do_async_gen_syndrome(struct dma_chan *chan, dma_addr_t dma_dest[2]; int src_off = 0; - if (submit->flags & ASYNC_TX_FENCE) - dma_flags |= DMA_PREP_FENCE; - while (src_cnt > 0) { submit->flags = flags_orig; pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); @@ -83,6 +80,8 @@ do_async_gen_syndrome(struct dma_chan *chan, if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Drivers force forward progress in case they can not provide * a descriptor diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c index 10ce48e16ebf..d830705f8a18 100644 --- a/drivers/acpi/acpica/evxfevnt.c +++ b/drivers/acpi/acpica/evxfevnt.c @@ -180,6 +180,12 @@ acpi_status acpi_enable_event(u32 event, u32 flags) ACPI_FUNCTION_TRACE(acpi_enable_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { @@ -237,6 +243,12 @@ acpi_status acpi_disable_event(u32 event, u32 flags) ACPI_FUNCTION_TRACE(acpi_disable_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { @@ -290,6 +302,12 @@ acpi_status acpi_clear_event(u32 event) ACPI_FUNCTION_TRACE(acpi_clear_event); + /* If Hardware Reduced flag is set, there are no fixed events */ + + if (acpi_gbl_reduced_hardware) { + return_ACPI_STATUS(AE_OK); + } + /* Decode the Fixed Event */ if (event > ACPI_EVENT_MAX) { diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index e54bc2aa7a88..a05b3b79b987 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -121,6 +121,9 @@ static acpi_status acpi_ps_get_aml_opcode(struct acpi_walk_state *walk_state) (u32)(aml_offset + sizeof(struct acpi_table_header))); + ACPI_ERROR((AE_INFO, + "Aborting disassembly, AML byte code is corrupt")); + /* Dump the context surrounding the invalid opcode */ acpi_ut_dump_buffer(((u8 *)walk_state->parser_state. @@ -129,6 +132,14 @@ static acpi_status acpi_ps_get_aml_opcode(struct acpi_walk_state *walk_state) sizeof(struct acpi_table_header) - 16)); acpi_os_printf(" */\n"); + + /* + * Just abort the disassembly, cannot continue because the + * parser is essentially lost. The disassembler can then + * randomly fail because an ill-constructed parse tree + * can result. + */ + return_ACPI_STATUS(AE_AML_BAD_OPCODE); #endif } @@ -293,6 +304,9 @@ acpi_ps_create_op(struct acpi_walk_state *walk_state, if (status == AE_CTRL_PARSE_CONTINUE) { return_ACPI_STATUS(AE_CTRL_PARSE_CONTINUE); } + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } /* Create Op structure and append to parent's argument list */ diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index b48ecbfc4498..8c5503c0bad7 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -206,6 +206,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1557060 */ + .callback = video_detect_force_video, + .ident = "SAMSUNG 670Z5E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_MATCH(DMI_PRODUCT_NAME, "670Z5E"), + }, + }, + { /* https://bugzilla.redhat.com/show_bug.cgi?id=1094948 */ .callback = video_detect_force_video, .ident = "SAMSUNG 730U3E/740U3E", diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index aaa761b9081c..cd2eab6aa92e 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -514,8 +514,9 @@ int ahci_platform_init_host(struct platform_device *pdev, irq = platform_get_irq(pdev, 0); if (irq <= 0) { - dev_err(dev, "no irq\n"); - return -EINVAL; + if (irq != -EPROBE_DEFER) + dev_err(dev, "no irq\n"); + return irq; } hpriv->irq = irq; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 576b5facdf43..b3a62e94d1f3 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1582,7 +1582,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg, return -EINVAL; if (val_len % map->format.val_bytes) return -EINVAL; - if (map->max_raw_write && map->max_raw_write > val_len) + if (map->max_raw_write && map->max_raw_write < val_len) return -E2BIG; map->lock(map->lock_arg); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3e65ae144fde..e8165ec55e6f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -623,6 +623,9 @@ static int loop_switch(struct loop_device *lo, struct file *file) */ static int loop_flush(struct loop_device *lo) { + /* loop not yet configured, no running thread, nothing to flush */ + if (lo->lo_state != Lo_bound) + return 0; return loop_switch(lo, NULL); } @@ -1118,11 +1121,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (info->lo_encrypt_type) { unsigned int type = info->lo_encrypt_type; - if (type >= MAX_LO_CRYPT) - return -EINVAL; + if (type >= MAX_LO_CRYPT) { + err = -EINVAL; + goto exit; + } xfer = xfer_funcs[type]; - if (xfer == NULL) - return -EINVAL; + if (xfer == NULL) { + err = -EINVAL; + goto exit; + } } else xfer = NULL; diff --git a/drivers/bus/brcmstb_gisb.c b/drivers/bus/brcmstb_gisb.c index f364fa4d24eb..f59183018280 100644 --- a/drivers/bus/brcmstb_gisb.c +++ b/drivers/bus/brcmstb_gisb.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Broadcom Corporation + * Copyright (C) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -33,8 +33,6 @@ #define ARB_ERR_CAP_CLEAR (1 << 0) #define ARB_ERR_CAP_STATUS_TIMEOUT (1 << 12) #define ARB_ERR_CAP_STATUS_TEA (1 << 11) -#define ARB_ERR_CAP_STATUS_BS_SHIFT (1 << 2) -#define ARB_ERR_CAP_STATUS_BS_MASK 0x3c #define ARB_ERR_CAP_STATUS_WRITE (1 << 1) #define ARB_ERR_CAP_STATUS_VALID (1 << 0) @@ -43,7 +41,6 @@ enum { ARB_ERR_CAP_CLR, ARB_ERR_CAP_HI_ADDR, ARB_ERR_CAP_ADDR, - ARB_ERR_CAP_DATA, ARB_ERR_CAP_STATUS, ARB_ERR_CAP_MASTER, }; @@ -53,7 +50,6 @@ static const int gisb_offsets_bcm7038[] = { [ARB_ERR_CAP_CLR] = 0x0c4, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x0c8, - [ARB_ERR_CAP_DATA] = 0x0cc, [ARB_ERR_CAP_STATUS] = 0x0d0, [ARB_ERR_CAP_MASTER] = -1, }; @@ -63,7 +59,6 @@ static const int gisb_offsets_bcm7400[] = { [ARB_ERR_CAP_CLR] = 0x0c8, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x0cc, - [ARB_ERR_CAP_DATA] = 0x0d0, [ARB_ERR_CAP_STATUS] = 0x0d4, [ARB_ERR_CAP_MASTER] = 0x0d8, }; @@ -73,7 +68,6 @@ static const int gisb_offsets_bcm7435[] = { [ARB_ERR_CAP_CLR] = 0x168, [ARB_ERR_CAP_HI_ADDR] = -1, [ARB_ERR_CAP_ADDR] = 0x16c, - [ARB_ERR_CAP_DATA] = 0x170, [ARB_ERR_CAP_STATUS] = 0x174, [ARB_ERR_CAP_MASTER] = 0x178, }; @@ -83,7 +77,6 @@ static const int gisb_offsets_bcm7445[] = { [ARB_ERR_CAP_CLR] = 0x7e4, [ARB_ERR_CAP_HI_ADDR] = 0x7e8, [ARB_ERR_CAP_ADDR] = 0x7ec, - [ARB_ERR_CAP_DATA] = 0x7f0, [ARB_ERR_CAP_STATUS] = 0x7f4, [ARB_ERR_CAP_MASTER] = 0x7f8, }; @@ -105,9 +98,13 @@ static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg) { int offset = gdev->gisb_offsets[reg]; - /* return 1 if the hardware doesn't have ARB_ERR_CAP_MASTER */ - if (offset == -1) - return 1; + if (offset < 0) { + /* return 1 if the hardware doesn't have ARB_ERR_CAP_MASTER */ + if (reg == ARB_ERR_CAP_MASTER) + return 1; + else + return 0; + } if (gdev->big_endian) return ioread32be(gdev->base + offset); @@ -115,6 +112,16 @@ static u32 gisb_read(struct brcmstb_gisb_arb_device *gdev, int reg) return ioread32(gdev->base + offset); } +static u64 gisb_read_address(struct brcmstb_gisb_arb_device *gdev) +{ + u64 value; + + value = gisb_read(gdev, ARB_ERR_CAP_ADDR); + value |= (u64)gisb_read(gdev, ARB_ERR_CAP_HI_ADDR) << 32; + + return value; +} + static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg) { int offset = gdev->gisb_offsets[reg]; @@ -123,9 +130,9 @@ static void gisb_write(struct brcmstb_gisb_arb_device *gdev, u32 val, int reg) return; if (gdev->big_endian) - iowrite32be(val, gdev->base + reg); + iowrite32be(val, gdev->base + offset); else - iowrite32(val, gdev->base + reg); + iowrite32(val, gdev->base + offset); } static ssize_t gisb_arb_get_timeout(struct device *dev, @@ -181,7 +188,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, const char *reason) { u32 cap_status; - unsigned long arb_addr; + u64 arb_addr; u32 master; const char *m_name; char m_fmt[11]; @@ -193,10 +200,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, return 1; /* Read the address and master */ - arb_addr = gisb_read(gdev, ARB_ERR_CAP_ADDR) & 0xffffffff; -#if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) - arb_addr |= (u64)gisb_read(gdev, ARB_ERR_CAP_HI_ADDR) << 32; -#endif + arb_addr = gisb_read_address(gdev); master = gisb_read(gdev, ARB_ERR_CAP_MASTER); m_name = brcmstb_gisb_master_to_str(gdev, master); @@ -205,7 +209,7 @@ static int brcmstb_gisb_arb_decode_addr(struct brcmstb_gisb_arb_device *gdev, m_name = m_fmt; } - pr_crit("%s: %s at 0x%lx [%c %s], core: %s\n", + pr_crit("%s: %s at 0x%llx [%c %s], core: %s\n", __func__, reason, arb_addr, cap_status & ARB_ERR_CAP_STATUS_WRITE ? 'W' : 'R', cap_status & ARB_ERR_CAP_STATUS_TIMEOUT ? "timeout" : "", diff --git a/drivers/char/diag/diag_dci.c b/drivers/char/diag/diag_dci.c index 16ff781cde65..b0b36d00415d 100644 --- a/drivers/char/diag/diag_dci.c +++ b/drivers/char/diag/diag_dci.c @@ -689,7 +689,7 @@ int diag_dci_query_log_mask(struct diag_dci_client_tbl *entry, byte_mask = 0x01 << (item_num % 8); offset = equip_id * 514; - if (offset + byte_index > DCI_LOG_MASK_SIZE) { + if (offset + byte_index >= DCI_LOG_MASK_SIZE) { pr_err("diag: In %s, invalid offset: %d, log_code: %d, byte_index: %d\n", __func__, offset, log_code, byte_index); return 0; @@ -716,7 +716,7 @@ int diag_dci_query_event_mask(struct diag_dci_client_tbl *entry, bit_index = event_id % 8; byte_mask = 0x1 << bit_index; - if (byte_index > DCI_EVENT_MASK_SIZE) { + if (byte_index >= DCI_EVENT_MASK_SIZE) { pr_err("diag: In %s, invalid, event_id: %d, byte_index: %d\n", __func__, event_id, byte_index); return 0; diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f53e8ba2c718..83c206f0fc98 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -409,6 +409,7 @@ static void start_event_fetch(struct ssif_info *ssif_info, unsigned long *flags) msg = ipmi_alloc_smi_msg(); if (!msg) { ssif_info->ssif_state = SSIF_NORMAL; + ipmi_ssif_unlock_cond(ssif_info, flags); return; } @@ -431,6 +432,7 @@ static void start_recv_msg_fetch(struct ssif_info *ssif_info, msg = ipmi_alloc_smi_msg(); if (!msg) { ssif_info->ssif_state = SSIF_NORMAL; + ipmi_ssif_unlock_cond(ssif_info, flags); return; } diff --git a/drivers/char/random.c b/drivers/char/random.c index 1822472dffab..dffd06a3bb76 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -724,7 +724,7 @@ retry: static int credit_entropy_bits_safe(struct entropy_store *r, int nbits) { - const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1)); + const int nbits_max = r->poolinfo->poolwords * 32; if (nbits < 0) return -EINVAL; @@ -886,12 +886,16 @@ static void add_interrupt_bench(cycles_t start) static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { __u32 *ptr = (__u32 *) regs; + unsigned int idx; if (regs == NULL) return 0; - if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32)) - f->reg_idx = 0; - return *(ptr + f->reg_idx++); + idx = READ_ONCE(f->reg_idx); + if (idx >= sizeof(struct pt_regs) / sizeof(__u32)) + idx = 0; + ptr += idx++; + WRITE_ONCE(f->reg_idx, idx); + return *ptr; } void add_interrupt_randomness(int irq, int irq_flags) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 7c4b1ffe874f..d56ba46e6b78 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -891,9 +891,7 @@ static void bcm2835_pll_off(struct clk_hw *hw) const struct bcm2835_pll_data *data = pll->data; spin_lock(&cprman->regs_lock); - cprman_write(cprman, data->cm_ctrl_reg, - cprman_read(cprman, data->cm_ctrl_reg) | - CM_PLL_ANARST); + cprman_write(cprman, data->cm_ctrl_reg, CM_PLL_ANARST); cprman_write(cprman, data->a2w_ctrl_reg, cprman_read(cprman, data->a2w_ctrl_reg) | A2W_PLL_CTRL_PWRDN); @@ -929,6 +927,10 @@ static int bcm2835_pll_on(struct clk_hw *hw) cpu_relax(); } + cprman_write(cprman, data->a2w_ctrl_reg, + cprman_read(cprman, data->a2w_ctrl_reg) | + A2W_PLL_CTRL_PRST_DISABLE); + return 0; } diff --git a/drivers/clk/clk-conf.c b/drivers/clk/clk-conf.c index 43a218f35b19..4ad32ce428cf 100644 --- a/drivers/clk/clk-conf.c +++ b/drivers/clk/clk-conf.c @@ -106,7 +106,7 @@ static int __set_clk_rates(struct device_node *node, bool clk_supplier) rc = clk_set_rate(clk, rate); if (rc < 0) - pr_err("clk: couldn't set %s clk rate to %d (%d), current rate: %ld\n", + pr_err("clk: couldn't set %s clk rate to %u (%d), current rate: %lu\n", __clk_get_name(clk), rate, rc, clk_get_rate(clk)); clk_put(clk); diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c index cd0f2726f5e0..c40445488d3a 100644 --- a/drivers/clk/clk-scpi.c +++ b/drivers/clk/clk-scpi.c @@ -71,15 +71,15 @@ static const struct clk_ops scpi_clk_ops = { }; /* find closest match to given frequency in OPP table */ -static int __scpi_dvfs_round_rate(struct scpi_clk *clk, unsigned long rate) +static long __scpi_dvfs_round_rate(struct scpi_clk *clk, unsigned long rate) { int idx; - u32 fmin = 0, fmax = ~0, ftmp; + unsigned long fmin = 0, fmax = ~0, ftmp; const struct scpi_opp *opp = clk->info->opps; for (idx = 0; idx < clk->info->count; idx++, opp++) { ftmp = opp->freq; - if (ftmp >= (u32)rate) { + if (ftmp >= rate) { if (ftmp <= fmax) fmax = ftmp; break; diff --git a/drivers/clk/mvebu/armada-38x.c b/drivers/clk/mvebu/armada-38x.c index 8bccf4ecdab6..9ff4ea63932d 100644 --- a/drivers/clk/mvebu/armada-38x.c +++ b/drivers/clk/mvebu/armada-38x.c @@ -46,10 +46,11 @@ static u32 __init armada_38x_get_tclk_freq(void __iomem *sar) } static const u32 armada_38x_cpu_frequencies[] __initconst = { - 0, 0, 0, 0, - 1066 * 1000 * 1000, 0, 0, 0, + 666 * 1000 * 1000, 0, 800 * 1000 * 1000, 0, + 1066 * 1000 * 1000, 0, 1200 * 1000 * 1000, 0, 1332 * 1000 * 1000, 0, 0, 0, - 1600 * 1000 * 1000, + 1600 * 1000 * 1000, 0, 0, 0, + 1866 * 1000 * 1000, 0, 0, 2000 * 1000 * 1000, }; static u32 __init armada_38x_get_cpu_freq(void __iomem *sar) @@ -75,11 +76,11 @@ static const struct coreclk_ratio armada_38x_coreclk_ratios[] __initconst = { }; static const int armada_38x_cpu_l2_ratios[32][2] __initconst = { - {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {1, 2}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {1, 2}, {0, 1}, + {1, 2}, {0, 1}, {1, 2}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, @@ -90,7 +91,7 @@ static const int armada_38x_cpu_ddr_ratios[32][2] __initconst = { {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, {1, 2}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {1, 2}, {0, 1}, {0, 1}, {7, 15}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index a5c111b67f37..ea11a33e7fff 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c @@ -174,8 +174,10 @@ int dt_init_idle_driver(struct cpuidle_driver *drv, if (!state_node) break; - if (!of_device_is_available(state_node)) + if (!of_device_is_available(state_node)) { + of_node_put(state_node); continue; + } if (!idle_state_valid(state_node, i, cpumask)) { pr_warn("%s idle state not valid, bailing out\n", diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c index 584a1857624a..324cce5d7354 100644 --- a/drivers/cpuidle/lpm-levels.c +++ b/drivers/cpuidle/lpm-levels.c @@ -1706,7 +1706,8 @@ static int cluster_cpuidle_register(struct lpm_cluster *cl) struct cpuidle_state *st = &cl->drv->states[i]; struct lpm_cpu_level *cpu_level = &cl->cpu->levels[i]; snprintf(st->name, CPUIDLE_NAME_LEN, "C%u\n", i); - snprintf(st->desc, CPUIDLE_DESC_LEN, cpu_level->name); + snprintf(st->desc, CPUIDLE_DESC_LEN, "%s", + cpu_level->name); st->flags = 0; st->exit_latency = cpu_level->pwr.latency_us; st->power_usage = cpu_level->pwr.ss_power; diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 66c073fc8afc..82a7c89caae2 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1473,10 +1473,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) { check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); - initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); - rmb(); cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC); rmb(); + initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD); + rmb(); cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc; rmb(); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index f8d740a6740d..48d4dddf4941 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1722,17 +1722,24 @@ static int sdma_probe(struct platform_device *pdev) if (IS_ERR(sdma->clk_ahb)) return PTR_ERR(sdma->clk_ahb); - clk_prepare(sdma->clk_ipg); - clk_prepare(sdma->clk_ahb); + ret = clk_prepare(sdma->clk_ipg); + if (ret) + return ret; + + ret = clk_prepare(sdma->clk_ahb); + if (ret) + goto err_clk; ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, "sdma", sdma); if (ret) - return ret; + goto err_irq; sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL); - if (!sdma->script_addrs) - return -ENOMEM; + if (!sdma->script_addrs) { + ret = -ENOMEM; + goto err_irq; + } /* initially no scripts available */ saddr_arr = (s32 *)sdma->script_addrs; @@ -1847,6 +1854,10 @@ err_register: dma_async_device_unregister(&sdma->dma_device); err_init: kfree(sdma->script_addrs); +err_irq: + clk_unprepare(sdma->clk_ahb); +err_clk: + clk_unprepare(sdma->clk_ipg); return ret; } @@ -1857,6 +1868,8 @@ static int sdma_remove(struct platform_device *pdev) dma_async_device_unregister(&sdma->dma_device); kfree(sdma->script_addrs); + clk_unprepare(sdma->clk_ahb); + clk_unprepare(sdma->clk_ipg); /* Kill the tasklet */ for (i = 0; i < MAX_DMA_CHANNELS; i++) { struct sdma_channel *sdmac = &sdma->channel[i]; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 0574e1bbe45c..3ce5609b4611 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -763,7 +763,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) /* Non-ECC RAM? */ printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); res = -ENODEV; - goto err2; + goto err; } edac_dbg(3, "init mci\n"); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 06d345b087f8..759a39906a52 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2145,7 +2145,8 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, return desc; } - status = gpiod_request(desc, con_id); + /* If a connection label was passed use that, else use the device name as label */ + status = gpiod_request(desc, con_id ? con_id : dev_name(dev)); if (status < 0) return ERR_PTR(status); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 7ed08fdc4c42..393e5335e33b 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -158,7 +158,7 @@ static void evict_entry(struct drm_gem_object *obj, size_t size = PAGE_SIZE * n; loff_t off = mmap_offset(obj) + (entry->obj_pgoff << PAGE_SHIFT); - const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); + const int m = DIV_ROUND_UP(omap_obj->width << fmt, PAGE_SIZE); if (m > 1) { int i; @@ -415,7 +415,7 @@ static int fault_2d(struct drm_gem_object *obj, * into account in some of the math, so figure out virtual stride * in pages */ - const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); + const int m = DIV_ROUND_UP(omap_obj->width << fmt, PAGE_SIZE); /* We don't use vmf->pgoff since that has the fake offset: */ pgoff = ((unsigned long)vmf->virtual_address - diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index fb6ad143873f..83aee9e814ba 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -238,9 +238,10 @@ int radeon_bo_create(struct radeon_device *rdev, * may be slow * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 */ - +#ifndef CONFIG_COMPILE_TEST #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ thanks to write-combining +#endif if (bo->flags & RADEON_GEM_GTT_WC) DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index d9007cc37be1..892d0a71d766 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -5964,9 +5964,9 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev, { u32 lane_width; u32 new_lane_width = - (radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; + ((radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1; u32 current_lane_width = - (radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT; + ((radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1; if (new_lane_width != current_lane_width) { radeon_set_pcie_lanes(rdev, new_lane_width); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index bbb3b473c185..d1d399cce06a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1331,7 +1331,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) * of implement() working on 8 byte chunks */ - int len = hid_report_len(report) + 7; + u32 len = hid_report_len(report) + 7; return kmalloc(len, flags); } @@ -1396,7 +1396,7 @@ void __hid_request(struct hid_device *hid, struct hid_report *report, { char *buf; int ret; - int len; + u32 len; buf = hid_alloc_report_buf(report, GFP_KERNEL); if (!buf) @@ -1422,14 +1422,14 @@ out: } EXPORT_SYMBOL_GPL(__hid_request); -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, +int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt) { struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_report *report; struct hid_driver *hdrv; unsigned int a; - int rsize, csize = size; + u32 rsize, csize = size; u8 *cdata = data; int ret = 0; @@ -1487,7 +1487,7 @@ EXPORT_SYMBOL_GPL(hid_report_raw_event); * * This is data entry for lower layers. */ -int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int interrupt) +int hid_input_report(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt) { struct hid_report_enum *report_enum; struct hid_driver *hdrv; diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 53e54855c366..8d74e691ac90 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -1258,7 +1258,8 @@ static void hidinput_led_worker(struct work_struct *work) led_work); struct hid_field *field; struct hid_report *report; - int len, ret; + int ret; + u32 len; __u8 *buf; field = hidinput_get_led_field(hid); diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index f62a9d6601cc..9de379c1b3fd 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -314,7 +314,8 @@ static struct attribute_group mt_attribute_group = { static void mt_get_feature(struct hid_device *hdev, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hdev); - int ret, size = hid_report_len(report); + int ret; + u32 size = hid_report_len(report); u8 *buf; /* @@ -919,7 +920,7 @@ static void mt_set_input_mode(struct hid_device *hdev) struct hid_report_enum *re; struct mt_class *cls = &td->mtclass; char *buf; - int report_len; + u32 report_len; if (td->inputmode < 0) return; diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 67cd059a8f46..41a4a2af9db1 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -110,8 +110,8 @@ struct rmi_data { u8 *writeReport; u8 *readReport; - int input_report_size; - int output_report_size; + u32 input_report_size; + u32 output_report_size; unsigned long flags; diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 9c2d7c23f296..c0c4df198725 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -197,6 +197,11 @@ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t int ret = 0, len; unsigned char report_number; + if (!hidraw_table[minor] || !hidraw_table[minor]->exist) { + ret = -ENODEV; + goto out; + } + dev = hidraw_table[minor]->hid; if (!dev->ll_driver->raw_request) { diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 312aa1e33fb2..4c3ed078c6b9 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -137,10 +137,10 @@ struct i2c_hid { * register of the HID * descriptor. */ unsigned int bufsize; /* i2c buffer size */ - char *inbuf; /* Input buffer */ - char *rawbuf; /* Raw Input buffer */ - char *cmdbuf; /* Command buffer */ - char *argsbuf; /* Command arguments buffer */ + u8 *inbuf; /* Input buffer */ + u8 *rawbuf; /* Raw Input buffer */ + u8 *cmdbuf; /* Command buffer */ + u8 *argsbuf; /* Command arguments buffer */ unsigned long flags; /* device flags */ @@ -387,7 +387,8 @@ static int i2c_hid_hwreset(struct i2c_client *client) static void i2c_hid_get_input(struct i2c_hid *ihid) { - int ret, ret_size; + int ret; + u32 ret_size; int size = le16_to_cpu(ihid->hdesc.wMaxInputLength); if (size > ihid->bufsize) @@ -412,7 +413,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) return; } - if (ret_size > size) { + if ((ret_size > size) || (ret_size <= 2)) { dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", __func__, size, ret_size); return; diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index b24f1d3045f0..ac63e562071f 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -94,18 +94,20 @@ enum ina2xx_ids { ina219, ina226 }; struct ina2xx_config { u16 config_default; - int calibration_factor; + int calibration_value; int registers; int shunt_div; int bus_voltage_shift; int bus_voltage_lsb; /* uV */ - int power_lsb; /* uW */ + int power_lsb_factor; }; struct ina2xx_data { const struct ina2xx_config *config; long rshunt; + long current_lsb_uA; + long power_lsb_uW; struct mutex config_lock; struct regmap *regmap; @@ -115,21 +117,21 @@ struct ina2xx_data { static const struct ina2xx_config ina2xx_config[] = { [ina219] = { .config_default = INA219_CONFIG_DEFAULT, - .calibration_factor = 40960000, + .calibration_value = 4096, .registers = INA219_REGISTERS, .shunt_div = 100, .bus_voltage_shift = 3, .bus_voltage_lsb = 4000, - .power_lsb = 20000, + .power_lsb_factor = 20, }, [ina226] = { .config_default = INA226_CONFIG_DEFAULT, - .calibration_factor = 5120000, + .calibration_value = 2048, .registers = INA226_REGISTERS, .shunt_div = 400, .bus_voltage_shift = 0, .bus_voltage_lsb = 1250, - .power_lsb = 25000, + .power_lsb_factor = 25, }, }; @@ -168,12 +170,16 @@ static u16 ina226_interval_to_reg(int interval) return INA226_SHIFT_AVG(avg_bits); } +/* + * Calibration register is set to the best value, which eliminates + * truncation errors on calculating current register in hardware. + * According to datasheet (eq. 3) the best values are 2048 for + * ina226 and 4096 for ina219. They are hardcoded as calibration_value. + */ static int ina2xx_calibrate(struct ina2xx_data *data) { - u16 val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - data->rshunt); - - return regmap_write(data->regmap, INA2XX_CALIBRATION, val); + return regmap_write(data->regmap, INA2XX_CALIBRATION, + data->config->calibration_value); } /* @@ -186,10 +192,6 @@ static int ina2xx_init(struct ina2xx_data *data) if (ret < 0) return ret; - /* - * Set current LSB to 1mA, shunt is in uOhms - * (equation 13 in datasheet). - */ return ina2xx_calibrate(data); } @@ -267,15 +269,15 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg, val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_POWER: - val = regval * data->config->power_lsb; + val = regval * data->power_lsb_uW; break; case INA2XX_CURRENT: - /* signed register, LSB=1mA (selected), in mA */ - val = (s16)regval; + /* signed register, result in mA */ + val = regval * data->current_lsb_uA; + val = DIV_ROUND_CLOSEST(val, 1000); break; case INA2XX_CALIBRATION: - val = DIV_ROUND_CLOSEST(data->config->calibration_factor, - regval); + val = regval; break; default: /* programmer goofed */ @@ -303,9 +305,32 @@ static ssize_t ina2xx_show_value(struct device *dev, ina2xx_get_value(data, attr->index, regval)); } -static ssize_t ina2xx_set_shunt(struct device *dev, - struct device_attribute *da, - const char *buf, size_t count) +/* + * In order to keep calibration register value fixed, the product + * of current_lsb and shunt_resistor should also be fixed and equal + * to shunt_voltage_lsb = 1 / shunt_div multiplied by 10^9 in order + * to keep the scale. + */ +static int ina2xx_set_shunt(struct ina2xx_data *data, long val) +{ + unsigned int dividend = DIV_ROUND_CLOSEST(1000000000, + data->config->shunt_div); + if (val <= 0 || val > dividend) + return -EINVAL; + + mutex_lock(&data->config_lock); + data->rshunt = val; + data->current_lsb_uA = DIV_ROUND_CLOSEST(dividend, val); + data->power_lsb_uW = data->config->power_lsb_factor * + data->current_lsb_uA; + mutex_unlock(&data->config_lock); + + return 0; +} + +static ssize_t ina2xx_store_shunt(struct device *dev, + struct device_attribute *da, + const char *buf, size_t count) { unsigned long val; int status; @@ -315,18 +340,9 @@ static ssize_t ina2xx_set_shunt(struct device *dev, if (status < 0) return status; - if (val == 0 || - /* Values greater than the calibration factor make no sense. */ - val > data->config->calibration_factor) - return -EINVAL; - - mutex_lock(&data->config_lock); - data->rshunt = val; - status = ina2xx_calibrate(data); - mutex_unlock(&data->config_lock); + status = ina2xx_set_shunt(data, val); if (status < 0) return status; - return count; } @@ -386,7 +402,7 @@ static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ina2xx_show_value, NULL, /* shunt resistance */ static SENSOR_DEVICE_ATTR(shunt_resistor, S_IRUGO | S_IWUSR, - ina2xx_show_value, ina2xx_set_shunt, + ina2xx_show_value, ina2xx_store_shunt, INA2XX_CALIBRATION); /* update interval (ina226 only) */ @@ -431,6 +447,7 @@ static int ina2xx_probe(struct i2c_client *client, /* set the device type */ data->config = &ina2xx_config[id->driver_data]; + mutex_init(&data->config_lock); if (of_property_read_u32(dev->of_node, "shunt-resistor", &val) < 0) { struct ina2xx_platform_data *pdata = dev_get_platdata(dev); @@ -441,10 +458,7 @@ static int ina2xx_probe(struct i2c_client *client, val = INA2XX_RSHUNT_DEFAULT; } - if (val <= 0 || val > data->config->calibration_factor) - return -ENODEV; - - data->rshunt = val; + ina2xx_set_shunt(data, val); ina2xx_regmap_config.max_register = data->config->registers; @@ -460,8 +474,6 @@ static int ina2xx_probe(struct i2c_client *client, return -ENODEV; } - mutex_init(&data->config_lock); - data->groups[group++] = &ina2xx_group; if (id->driver_data == ina226) data->groups[group++] = &ina226_group; diff --git a/drivers/i2c/busses/i2c-msm-v2.c b/drivers/i2c/busses/i2c-msm-v2.c index c0d962212720..67261bc10e80 100644 --- a/drivers/i2c/busses/i2c-msm-v2.c +++ b/drivers/i2c/busses/i2c-msm-v2.c @@ -2848,8 +2848,8 @@ static void i2c_msm_pm_rt_init(struct device *dev) {} static const struct dev_pm_ops i2c_msm_pm_ops = { #ifdef CONFIG_PM_SLEEP - .suspend_noirq = i2c_msm_pm_sys_suspend_noirq, - .resume_noirq = i2c_msm_pm_sys_resume_noirq, + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(i2c_msm_pm_sys_suspend_noirq, + i2c_msm_pm_sys_resume_noirq) #endif SET_RUNTIME_PM_OPS(i2c_msm_pm_rt_suspend, i2c_msm_pm_rt_resume, diff --git a/drivers/iio/adc/hi8435.c b/drivers/iio/adc/hi8435.c index c73c6c62a6ac..7401f102dff4 100644 --- a/drivers/iio/adc/hi8435.c +++ b/drivers/iio/adc/hi8435.c @@ -121,10 +121,21 @@ static int hi8435_write_event_config(struct iio_dev *idev, enum iio_event_direction dir, int state) { struct hi8435_priv *priv = iio_priv(idev); + int ret; + u32 tmp; + + if (state) { + ret = hi8435_readl(priv, HI8435_SO31_0_REG, &tmp); + if (ret < 0) + return ret; + if (tmp & BIT(chan->channel)) + priv->event_prev_val |= BIT(chan->channel); + else + priv->event_prev_val &= ~BIT(chan->channel); - priv->event_scan_mask &= ~BIT(chan->channel); - if (state) priv->event_scan_mask |= BIT(chan->channel); + } else + priv->event_scan_mask &= ~BIT(chan->channel); return 0; } @@ -442,13 +453,15 @@ static int hi8435_probe(struct spi_device *spi) priv->spi = spi; reset_gpio = devm_gpiod_get(&spi->dev, NULL, GPIOD_OUT_LOW); - if (IS_ERR(reset_gpio)) { - /* chip s/w reset if h/w reset failed */ + if (!IS_ERR(reset_gpio)) { + /* need >=100ns low pulse to reset chip */ + gpiod_set_raw_value_cansleep(reset_gpio, 0); + udelay(1); + gpiod_set_raw_value_cansleep(reset_gpio, 1); + } else { + /* s/w reset chip if h/w reset is not available */ hi8435_writeb(priv, HI8435_CTRL_REG, HI8435_CTRL_SRST); hi8435_writeb(priv, HI8435_CTRL_REG, 0); - } else { - udelay(5); - gpiod_set_value(reset_gpio, 1); } spi_set_drvdata(spi, idev); diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c index 6325e7dc8e03..f3cb4dc05391 100644 --- a/drivers/iio/magnetometer/st_magn_spi.c +++ b/drivers/iio/magnetometer/st_magn_spi.c @@ -48,8 +48,6 @@ static int st_magn_spi_remove(struct spi_device *spi) } static const struct spi_device_id st_magn_id_table[] = { - { LSM303DLHC_MAGN_DEV_NAME }, - { LSM303DLM_MAGN_DEV_NAME }, { LIS3MDL_MAGN_DEV_NAME }, { LSM303AGR_MAGN_DEV_NAME }, {}, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 960fcb613198..ea3bc9bb1b7a 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1230,6 +1230,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx, if (!optlen) return -EINVAL; + if (!ctx->cm_id->device) + return -EINVAL; + memset(&sa_path, 0, sizeof(sa_path)); ib_sa_unpack_path(path_data->path_rec, &sa_path); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9a99cee2665a..4fd2892613dd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2581,9 +2581,11 @@ static int srp_abort(struct scsi_cmnd *scmnd) ret = FAST_IO_FAIL; else ret = FAILED; - srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scmnd->scsi_done(scmnd); + if (ret == SUCCESS) { + srp_free_req(ch, req, scmnd, 0); + scmnd->result = DID_ABORT << 16; + scmnd->scsi_done(scmnd); + } return ret; } @@ -3309,12 +3311,10 @@ static ssize_t srp_create_target(struct device *dev, num_online_nodes()); const int ch_end = ((node_idx + 1) * target->ch_count / num_online_nodes()); - const int cv_start = (node_idx * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; - const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes() + target->comp_vector) - % ibdev->num_comp_vectors; + const int cv_start = node_idx * ibdev->num_comp_vectors / + num_online_nodes(); + const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / + num_online_nodes(); int cpu_idx = 0; for_each_online_cpu(cpu) { diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index a73874508c3a..cb3a8623ff54 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2974,12 +2974,8 @@ static void srpt_queue_response(struct se_cmd *cmd) } spin_unlock_irqrestore(&ioctx->spinlock, flags); - if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) - || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { - atomic_inc(&ch->req_lim_delta); - srpt_abort_cmd(ioctx); + if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) return; - } dir = ioctx->cmd.data_direction; diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index c9d491bc85e0..3851d5715772 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1082,6 +1082,13 @@ static int elan_probe(struct i2c_client *client, return error; } + /* Make sure there is something at this address */ + error = i2c_smbus_read_byte(client); + if (error < 0) { + dev_dbg(&client->dev, "nothing at this address: %d\n", error); + return -ENXIO; + } + /* Initialize the touchpad. */ error = elan_initialize(data); if (error) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index a679e56c44cd..765879dcaf85 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -557,7 +557,14 @@ static int elan_i2c_finish_fw_update(struct i2c_client *client, long ret; int error; int len; - u8 buffer[ETP_I2C_INF_LENGTH]; + u8 buffer[ETP_I2C_REPORT_LEN]; + + len = i2c_master_recv(client, buffer, ETP_I2C_REPORT_LEN); + if (len != ETP_I2C_REPORT_LEN) { + error = len < 0 ? len : -EIO; + dev_warn(dev, "failed to read I2C data after FW WDT reset: %d (%d)\n", + error, len); + } reinit_completion(completion); enable_irq(client->irq); diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 51b96e9bf793..06ea28e5d7b4 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1715,6 +1715,17 @@ int elantech_init(struct psmouse *psmouse) etd->samples[0], etd->samples[1], etd->samples[2]); } + if (etd->samples[1] == 0x74 && etd->hw_version == 0x03) { + /* + * This module has a bug which makes absolute mode + * unusable, so let's abort so we'll be using standard + * PS/2 protocol. + */ + psmouse_info(psmouse, + "absolute mode broken, forcing standard PS/2 protocol\n"); + goto init_fail; + } + if (elantech_set_absolute_mode(psmouse)) { psmouse_err(psmouse, "failed to put touchpad into absolute mode.\n"); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a7d516f973dd..10068a481e22 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -389,6 +389,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_max - 1, GFP_KERNEL); if (ret < 0) { kfree(svm); + kfree(sdev); goto out; } svm->pasid = ret; diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 9cb4b621fbc3..b92a19a594a1 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -72,7 +72,7 @@ send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) - cskb = skb_copy(skb, GFP_KERNEL); + cskb = skb_copy(skb, GFP_ATOMIC); if (!cskb) { printk(KERN_WARNING "%s no skb\n", __func__); break; diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index b775e1efecd3..b9f71a87b7e1 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -281,7 +281,7 @@ static int pca955x_probe(struct i2c_client *client, "slave address 0x%02x\n", id->name, chip->bits, client->addr); - if (!i2c_check_functionality(adapter, I2C_FUNC_I2C)) + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; if (pdata) { diff --git a/drivers/leds/leds-qpnp-flash-v2.c b/drivers/leds/leds-qpnp-flash-v2.c index a58775953242..c90633b16fad 100644 --- a/drivers/leds/leds-qpnp-flash-v2.c +++ b/drivers/leds/leds-qpnp-flash-v2.c @@ -319,7 +319,7 @@ static inline int get_current_reg_code(int target_curr_ma, int ires_ua) if (!ires_ua || !target_curr_ma || (target_curr_ma < (ires_ua / 1000))) return 0; - return DIV_ROUND_UP(target_curr_ma * 1000, ires_ua) - 1; + return DIV_ROUND_CLOSEST(target_curr_ma * 1000, ires_ua) - 1; } static int qpnp_flash_led_read(struct qpnp_flash_led *led, u16 addr, u8 *data) diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 4d46f2ce606f..aa84fcfd59fc 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -514,15 +514,21 @@ struct open_bucket { /* * We keep multiple buckets open for writes, and try to segregate different - * write streams for better cache utilization: first we look for a bucket where - * the last write to it was sequential with the current write, and failing that - * we look for a bucket that was last used by the same task. + * write streams for better cache utilization: first we try to segregate flash + * only volume write streams from cached devices, secondly we look for a bucket + * where the last write to it was sequential with the current write, and + * failing that we look for a bucket that was last used by the same task. * * The ideas is if you've got multiple tasks pulling data into the cache at the * same time, you'll get better cache utilization if you try to segregate their * data and preserve locality. * - * For example, say you've starting Firefox at the same time you're copying a + * For example, dirty sectors of flash only volume is not reclaimable, if their + * dirty sectors mixed with dirty sectors of cached device, such buckets will + * be marked as dirty and won't be reclaimed, though the dirty data of cached + * device have been written back to backend device. + * + * And say you've starting Firefox at the same time you're copying a * bunch of files. Firefox will likely end up being fairly hot and stay in the * cache awhile, but the data you copied might not be; if you wrote all that * data to the same buckets it'd get invalidated at the same time. @@ -539,7 +545,10 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c, struct open_bucket *ret, *ret_task = NULL; list_for_each_entry_reverse(ret, &c->data_buckets, list) - if (!bkey_cmp(&ret->key, search)) + if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) != + UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)])) + continue; + else if (!bkey_cmp(&ret->key, search)) goto found; else if (ret->last_write_point == write_point) ret_task = ret; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 02257cb19c0b..b9a526271f02 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -890,6 +890,12 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); + cancel_delayed_work_sync(&dc->writeback_rate_update); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) { + kthread_stop(dc->writeback_thread); + dc->writeback_thread = NULL; + } + memset(&dc->sb.set_uuid, 0, 16); SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e34cf53bd068..ceff074b3b74 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/reboot.h> +#include <linux/vmalloc.h> #define DM_MSG_PREFIX "verity" @@ -32,6 +33,7 @@ #define DM_VERITY_OPT_LOGGING "ignore_corruption" #define DM_VERITY_OPT_RESTART "restart_on_corruption" #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" +#define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" #define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC) @@ -399,6 +401,18 @@ static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, } /* + * Moves the bio iter one data block forward. + */ +static inline void verity_bv_skip_block(struct dm_verity *v, + struct dm_verity_io *io, + struct bvec_iter *iter) +{ + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); + + bio_advance_iter(bio, iter, 1 << v->data_dev_block_bits); +} + +/* * Verify one "dm_verity_io" structure. */ static int verity_verify_io(struct dm_verity_io *io) @@ -410,9 +424,16 @@ static int verity_verify_io(struct dm_verity_io *io) for (b = 0; b < io->n_blocks; b++) { int r; + sector_t cur_block = io->block + b; struct shash_desc *desc = verity_io_hash_desc(v, io); - r = verity_hash_for_block(v, io, io->block + b, + if (v->validated_blocks && + likely(test_bit(cur_block, v->validated_blocks))) { + verity_bv_skip_block(v, io, &io->iter); + continue; + } + + r = verity_hash_for_block(v, io, cur_block, verity_io_want_digest(v, io), &is_zero); if (unlikely(r < 0)) @@ -445,13 +466,16 @@ static int verity_verify_io(struct dm_verity_io *io) return r; if (likely(memcmp(verity_io_real_digest(v, io), - verity_io_want_digest(v, io), v->digest_size) == 0)) + verity_io_want_digest(v, io), v->digest_size) == 0)) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); continue; + } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b, NULL, &start) == 0) + cur_block, NULL, &start) == 0) continue; else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, - io->block + b)) + cur_block)) return -EIO; } @@ -645,6 +669,8 @@ void verity_status(struct dm_target *ti, status_type_t type, args += DM_VERITY_OPTS_FEC; if (v->zero_digest) args++; + if (v->validated_blocks) + args++; if (!args) return; DMEMIT(" %u", args); @@ -663,6 +689,8 @@ void verity_status(struct dm_target *ti, status_type_t type, } if (v->zero_digest) DMEMIT(" " DM_VERITY_OPT_IGN_ZEROES); + if (v->validated_blocks) + DMEMIT(" " DM_VERITY_OPT_AT_MOST_ONCE); sz = verity_fec_status_table(v, sz, result, maxlen); break; } @@ -716,6 +744,7 @@ void verity_dtr(struct dm_target *ti) if (v->bufio) dm_bufio_client_destroy(v->bufio); + vfree(v->validated_blocks); kfree(v->salt); kfree(v->root_digest); kfree(v->zero_digest); @@ -737,6 +766,26 @@ void verity_dtr(struct dm_target *ti) } EXPORT_SYMBOL_GPL(verity_dtr); +static int verity_alloc_most_once(struct dm_verity *v) +{ + struct dm_target *ti = v->ti; + + /* the bitset can only handle INT_MAX blocks */ + if (v->data_blocks > INT_MAX) { + ti->error = "device too large to use check_at_most_once"; + return -E2BIG; + } + + v->validated_blocks = vzalloc(BITS_TO_LONGS(v->data_blocks) * + sizeof(unsigned long)); + if (!v->validated_blocks) { + ti->error = "failed to allocate bitset for check_at_most_once"; + return -ENOMEM; + } + + return 0; +} + static int verity_alloc_zero_digest(struct dm_verity *v) { int r = -ENOMEM; @@ -806,6 +855,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v) } continue; + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_AT_MOST_ONCE)) { + r = verity_alloc_most_once(v); + if (r) + return r; + continue; + } else if (verity_is_fec_opt_arg(arg_name)) { r = verity_fec_parse_opt_args(as, v, &argc, arg_name); if (r) @@ -1074,7 +1129,7 @@ EXPORT_SYMBOL_GPL(verity_ctr); static struct target_type verity_target = { .name = "verity", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index a90d1d416107..d216fc76d350 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -63,6 +63,7 @@ struct dm_verity { sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; struct dm_verity_fec *fec; /* forward error correction */ + unsigned long *validated_blocks; /* bitset blocks validated */ }; struct dm_verity_io { diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 494d01d0e92a..a7a561af05c9 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -945,8 +945,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); lock_comm(cinfo); ret = __sendmsg(cinfo, &cmsg); - if (ret) + if (ret) { + unlock_comm(cinfo); return ret; + } cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5a334b947a16..9284acea4f7b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -110,8 +110,7 @@ static inline void unlock_device_hash_lock(struct r5conf *conf, int hash) static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) { int i; - local_irq_disable(); - spin_lock(conf->hash_locks); + spin_lock_irq(conf->hash_locks); for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); spin_lock(&conf->device_lock); @@ -121,9 +120,9 @@ static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf) { int i; spin_unlock(&conf->device_lock); - for (i = NR_STRIPE_HASH_LOCKS; i; i--) - spin_unlock(conf->hash_locks + i - 1); - local_irq_enable(); + for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) + spin_unlock(conf->hash_locks + i); + spin_unlock_irq(conf->hash_locks); } /* bio's attached to a stripe+device for I/O are linked together in bi_sector @@ -726,12 +725,11 @@ static bool is_full_stripe_write(struct stripe_head *sh) static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { - local_irq_disable(); if (sh1 > sh2) { - spin_lock(&sh2->stripe_lock); + spin_lock_irq(&sh2->stripe_lock); spin_lock_nested(&sh1->stripe_lock, 1); } else { - spin_lock(&sh1->stripe_lock); + spin_lock_irq(&sh1->stripe_lock); spin_lock_nested(&sh2->stripe_lock, 1); } } @@ -739,8 +737,7 @@ static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) { spin_unlock(&sh1->stripe_lock); - spin_unlock(&sh2->stripe_lock); - local_irq_enable(); + spin_unlock_irq(&sh2->stripe_lock); } /* Only freshly new full stripe normal write stripe can be added to a batch list */ diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c index fe6eb78b6914..a47ab1947cc4 100644 --- a/drivers/media/i2c/cx25840/cx25840-core.c +++ b/drivers/media/i2c/cx25840/cx25840-core.c @@ -420,11 +420,13 @@ static void cx25840_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } /* 6. */ cx25840_write(client, 0x115, 0x8c); @@ -631,11 +633,13 @@ static void cx23885_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } /* Call the cx23888 specific std setup func, we no longer rely on * the generic cx24840 func. @@ -746,11 +750,13 @@ static void cx231xx_initialize(struct i2c_client *client) INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); - prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); - queue_work(q, &state->fw_work); - schedule(); - finish_wait(&state->fw_wait, &wait); - destroy_workqueue(q); + if (q) { + prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); + queue_work(q, &state->fw_work); + schedule(); + finish_wait(&state->fw_wait, &wait); + destroy_workqueue(q); + } cx25840_std_setup(client); diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index f838d9c7ed12..0fba4a2c1602 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1370,8 +1370,13 @@ static int mceusb_dev_probe(struct usb_interface *intf, goto rc_dev_fail; /* wire up inbound data handler */ - usb_fill_int_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, - mceusb_dev_recv, ir, ep_in->bInterval); + if (usb_endpoint_xfer_int(ep_in)) + usb_fill_int_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, + mceusb_dev_recv, ir, ep_in->bInterval); + else + usb_fill_bulk_urb(ir->urb_in, dev, pipe, ir->buf_in, maxp, + mceusb_dev_recv, ir); + ir->urb_in->transfer_dma = ir->dma_in; ir->urb_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index dc3dd13db1be..5cbd15742050 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp, static int put_v4l2_window32(struct v4l2_window __user *kp, struct v4l2_window32 __user *up) { - struct v4l2_clip __user *kclips = kp->clips; + struct v4l2_clip __user *kclips; struct v4l2_clip32 __user *uclips; compat_caddr_t p; u32 clipcount; @@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp, if (!clipcount) return 0; + if (get_user(kclips, &kp->clips)) + return -EFAULT; if (get_user(p, &up->clips)) return -EFAULT; uclips = compat_ptr(p); diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index 3dc9ed2e0774..bb1e19f7ed5a 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -205,6 +205,10 @@ static int __vb2_queue_alloc(struct vb2_queue *q, enum vb2_memory memory, struct vb2_buffer *vb; int ret; + /* Ensure that q->num_buffers+num_buffers is below VB2_MAX_FRAME */ + num_buffers = min_t(unsigned int, num_buffers, + VB2_MAX_FRAME - q->num_buffers); + for (buffer = 0; buffer < num_buffers; ++buffer) { /* Allocate videobuf buffer structures */ vb = kzalloc(q->buf_struct_size, GFP_KERNEL); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index f42d9c4e4561..cc277f7849b0 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -298,8 +298,11 @@ static void *qp_alloc_queue(u64 size, u32 flags) size_t pas_size; size_t vas_size; size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if); - const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + u64 num_pages; + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / (sizeof(*queue->kernel_if->u.g.pas) + @@ -624,9 +627,12 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size) { struct vmci_queue *queue; size_t queue_page_size; - const u64 num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + u64 num_pages; const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / sizeof(*queue->kernel_if->u.h.page)) return NULL; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index de7def1f4f1c..35be47dafda2 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -3614,7 +3614,7 @@ void mmc_blk_cmdq_complete_rq(struct request *rq) * or disable state so cannot receive any completion of * other requests. */ - BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)); + WARN_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)); /* clear pending request */ BUG_ON(!test_and_clear_bit(cmdq_req->tag, @@ -3648,7 +3648,7 @@ void mmc_blk_cmdq_complete_rq(struct request *rq) out: mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd); - if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) { + if (!(err || cmdq_req->resp_err)) { mmc_host_clk_release(host); wake_up(&ctx_info->wait); mmc_put_card(host->card); diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 2af0e819d0cb..547d18c9feef 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -3011,8 +3011,16 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, u32 ocr) */ mmc_host_clk_hold(host); err = mmc_wait_for_cmd(host, &cmd, 0); - if (err) - goto err_command; + if (err) { + if (err == -ETIMEDOUT) { + pr_debug("%s: voltage switching failed with err %d\n", + mmc_hostname(host), err); + err = -EAGAIN; + goto power_cycle; + } else { + goto err_command; + } + } if (!mmc_host_is_spi(host) && (cmd.resp[0] & R1_ERROR)) { err = -EIO; diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index 76e8bce6f46e..ad572a0f2124 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -368,9 +368,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host, host->irq_mask &= ~irq; else host->irq_mask |= irq; - spin_unlock_irqrestore(&host->lock, flags); writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK); + spin_unlock_irqrestore(&host->lock, flags); } static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host, diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index b2fb0528c092..07ad86759d92 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -244,7 +244,7 @@ static int ubiblock_open(struct block_device *bdev, fmode_t mode) * in any case. */ if (mode & FMODE_WRITE) { - ret = -EPERM; + ret = -EROFS; goto out_unlock; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 9b7bc6326fa2..9556a4de159c 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -951,6 +951,17 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, return -EINVAL; } + /* + * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes. + * MLC NAND is different and needs special care, otherwise UBI or UBIFS + * will die soon and you will lose all your data. + */ + if (mtd->type == MTD_MLCNANDFLASH) { + pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n", + mtd->index); + return -EINVAL; + } + if (ubi_num == UBI_DEV_NUM_AUTO) { /* Search for an empty slot in the @ubi_devices array */ for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++) diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c index 30d3999dddba..ed62f1efe6eb 100644 --- a/drivers/mtd/ubi/fastmap-wl.c +++ b/drivers/mtd/ubi/fastmap-wl.c @@ -360,7 +360,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi) { int i; - flush_work(&ubi->fm_work); return_unused_pool_pebs(ubi, &ubi->fm_pool); return_unused_pool_pebs(ubi, &ubi->fm_wl_pool); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index eadccf498589..278d12888cab 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1490,39 +1490,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_close; } - /* If the mode uses primary, then the following is handled by - * bond_change_active_slave(). - */ - if (!bond_uses_primary(bond)) { - /* set promiscuity level to new slave */ - if (bond_dev->flags & IFF_PROMISC) { - res = dev_set_promiscuity(slave_dev, 1); - if (res) - goto err_close; - } - - /* set allmulti level to new slave */ - if (bond_dev->flags & IFF_ALLMULTI) { - res = dev_set_allmulti(slave_dev, 1); - if (res) - goto err_close; - } - - netif_addr_lock_bh(bond_dev); - - dev_mc_sync_multiple(slave_dev, bond_dev); - dev_uc_sync_multiple(slave_dev, bond_dev); - - netif_addr_unlock_bh(bond_dev); - } - - if (BOND_MODE(bond) == BOND_MODE_8023AD) { - /* add lacpdu mc addr to mc list */ - u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; - - dev_mc_add(slave_dev, lacpdu_multicast); - } - res = vlan_vids_add_by_dev(slave_dev, bond_dev); if (res) { netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n", @@ -1679,6 +1646,40 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_upper_unlink; } + /* If the mode uses primary, then the following is handled by + * bond_change_active_slave(). + */ + if (!bond_uses_primary(bond)) { + /* set promiscuity level to new slave */ + if (bond_dev->flags & IFF_PROMISC) { + res = dev_set_promiscuity(slave_dev, 1); + if (res) + goto err_sysfs_del; + } + + /* set allmulti level to new slave */ + if (bond_dev->flags & IFF_ALLMULTI) { + res = dev_set_allmulti(slave_dev, 1); + if (res) { + if (bond_dev->flags & IFF_PROMISC) + dev_set_promiscuity(slave_dev, -1); + goto err_sysfs_del; + } + } + + netif_addr_lock_bh(bond_dev); + dev_mc_sync_multiple(slave_dev, bond_dev); + dev_uc_sync_multiple(slave_dev, bond_dev); + netif_addr_unlock_bh(bond_dev); + + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + /* add lacpdu mc addr to mc list */ + u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR; + + dev_mc_add(slave_dev, lacpdu_multicast); + } + } + bond->slave_cnt++; bond_compute_features(bond); bond_set_carrier(bond); @@ -1702,6 +1703,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* Undo stages on error */ +err_sysfs_del: + bond_sysfs_slave_del(new_slave); + err_upper_unlink: bond_upper_dev_unlink(bond_dev, slave_dev); @@ -1709,9 +1713,6 @@ err_unregister: netdev_rx_handler_unregister(slave_dev); err_detach: - if (!bond_uses_primary(bond)) - bond_hw_addr_flush(bond_dev, slave_dev); - vlan_vids_del_by_dev(slave_dev, bond_dev); if (rcu_access_pointer(bond->primary_slave) == new_slave) RCU_INIT_POINTER(bond->primary_slave, NULL); @@ -2555,11 +2556,13 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); + slave->new_link = BOND_LINK_NOCHANGE; + if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->link = BOND_LINK_UP; + slave->new_link = BOND_LINK_UP; slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2586,7 +2589,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->link = BOND_LINK_DOWN; + slave->new_link = BOND_LINK_DOWN; slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2617,6 +2620,11 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) if (!rtnl_trylock()) goto re_arm; + bond_for_each_slave(bond, slave, iter) { + if (slave->new_link != BOND_LINK_NOCHANGE) + slave->link = slave->new_link; + } + if (slave_state_changed) { bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index d1103d612d8b..949a82458a29 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3943,15 +3943,26 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* when transmitting in a vf, start bd must hold the ethertype * for fw to enforce it */ + u16 vlan_tci = 0; #ifndef BNX2X_STOP_ON_ERROR - if (IS_VF(bp)) + if (IS_VF(bp)) { #endif - tx_start_bd->vlan_or_ethertype = - cpu_to_le16(ntohs(eth->h_proto)); + /* Still need to consider inband vlan for enforced */ + if (__vlan_get_tag(skb, &vlan_tci)) { + tx_start_bd->vlan_or_ethertype = + cpu_to_le16(ntohs(eth->h_proto)); + } else { + tx_start_bd->bd_flags.as_bitfield |= + (X_ETH_INBAND_VLAN << + ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT); + tx_start_bd->vlan_or_ethertype = + cpu_to_le16(vlan_tci); + } #ifndef BNX2X_STOP_ON_ERROR - else + } else { /* used by FW for packet accounting */ tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod); + } #endif } diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 0f6811860ad5..a36e38676640 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -2845,7 +2845,7 @@ bfa_ioc_get_adapter_optrom_ver(struct bfa_ioc *ioc, char *optrom_ver) static void bfa_ioc_get_adapter_manufacturer(struct bfa_ioc *ioc, char *manufacturer) { - memcpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); + strncpy(manufacturer, BFA_MFG_NAME, BFA_ADAPTER_MFG_NAME_LEN); } static void diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index cf61a5869c6e..de23f23b41de 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -6076,13 +6076,18 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, if (!t4_fw_matches_chip(adap, fw_hdr)) return -EINVAL; + /* Disable FW_OK flag so that mbox commands with FW_OK flag set + * wont be sent when we are flashing FW. + */ + adap->flags &= ~FW_OK; + ret = t4_fw_halt(adap, mbox, force); if (ret < 0 && !force) - return ret; + goto out; ret = t4_load_fw(adap, fw_data, size); if (ret < 0) - return ret; + goto out; /* * Older versions of the firmware don't understand the new @@ -6093,7 +6098,17 @@ int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, * its header flags to see if it advertises the capability. */ reset = ((be32_to_cpu(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0); - return t4_fw_restart(adap, mbox, reset); + ret = t4_fw_restart(adap, mbox, reset); + + /* Grab potentially new Firmware Device Log parameters so we can see + * how healthy the new Firmware is. It's okay to contact the new + * Firmware for these parameters even though, as far as it's + * concerned, we've never said "HELLO" to it ... + */ + (void)t4_init_devlog_params(adap); +out: + adap->flags |= FW_OK; + return ret; } /** @@ -7696,7 +7711,16 @@ int t4_cim_read_la(struct adapter *adap, u32 *la_buf, unsigned int *wrptr) ret = t4_cim_read(adap, UP_UP_DBG_LA_DATA_A, 1, &la_buf[i]); if (ret) break; - idx = (idx + 1) & UPDBGLARDPTR_M; + + /* Bits 0-3 of UpDbgLaRdPtr can be between 0000 to 1001 to + * identify the 32-bit portion of the full 312-bit data + */ + if (is_t6(adap->params.chip) && (idx & 0xf) >= 9) + idx = (idx & 0xff0) + 0x10; + else + idx++; + /* address can't exceed 0xfff */ + idx &= UPDBGLARDPTR_M; } restart: if (cfg & UPDBGLAEN_F) { diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index fa3786a9d30e..ec8ffd7eae33 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2604,8 +2604,8 @@ void t4vf_sge_stop(struct adapter *adapter) int t4vf_sge_init(struct adapter *adapter) { struct sge_params *sge_params = &adapter->params.sge; - u32 fl0 = sge_params->sge_fl_buffer_size[0]; - u32 fl1 = sge_params->sge_fl_buffer_size[1]; + u32 fl_small_pg = sge_params->sge_fl_buffer_size[0]; + u32 fl_large_pg = sge_params->sge_fl_buffer_size[1]; struct sge *s = &adapter->sge; unsigned int ingpadboundary, ingpackboundary; @@ -2614,9 +2614,20 @@ int t4vf_sge_init(struct adapter *adapter) * the Physical Function Driver. Ideally we should be able to deal * with _any_ configuration. Practice is different ... */ - if (fl0 != PAGE_SIZE || (fl1 != 0 && fl1 <= fl0)) { + + /* We only bother using the Large Page logic if the Large Page Buffer + * is larger than our Page Size Buffer. + */ + if (fl_large_pg <= fl_small_pg) + fl_large_pg = 0; + + /* The Page Size Buffer must be exactly equal to our Page Size and the + * Large Page Size Buffer should be 0 (per above) or a power of 2. + */ + if (fl_small_pg != PAGE_SIZE || + (fl_large_pg & (fl_large_pg - 1)) != 0) { dev_err(adapter->pdev_dev, "bad SGE FL buffer sizes [%d, %d]\n", - fl0, fl1); + fl_small_pg, fl_large_pg); return -EINVAL; } if ((sge_params->sge_control & RXPKTCPLMODE_F) == 0) { @@ -2627,8 +2638,8 @@ int t4vf_sge_init(struct adapter *adapter) /* * Now translate the adapter parameters into our internal forms. */ - if (fl1) - s->fl_pg_order = ilog2(fl1) - PAGE_SHIFT; + if (fl_large_pg) + s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT; s->stat_len = ((sge_params->sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64); s->pktshift = PKTSHIFT_G(sge_params->sge_control); diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 40071dad1c57..9c76f1a2f57b 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -382,7 +382,7 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) { const struct of_device_id *id = of_match_device(fsl_pq_mdio_match, &pdev->dev); - const struct fsl_pq_mdio_data *data = id->data; + const struct fsl_pq_mdio_data *data; struct device_node *np = pdev->dev.of_node; struct resource res; struct device_node *tbi; @@ -390,6 +390,13 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev) struct mii_bus *new_bus; int err; + if (!id) { + dev_err(&pdev->dev, "Failed to match device\n"); + return -ENODEV; + } + + data = id->data; + dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible); new_bus = mdiobus_alloc_size(sizeof(*priv)); diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 5d7db6c01c46..f301c03c527b 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -342,6 +342,7 @@ static int emac_reset(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int n = 20; + bool __maybe_unused try_internal_clock = false; DBG(dev, "reset" NL); @@ -354,6 +355,7 @@ static int emac_reset(struct emac_instance *dev) } #ifdef CONFIG_PPC_DCR_NATIVE +do_retry: /* * PPC460EX/GT Embedded Processor Advanced User's Manual * section 28.10.1 Mode Register 0 (EMACx_MR0) states: @@ -361,10 +363,19 @@ static int emac_reset(struct emac_instance *dev) * of the EMAC. If none is present, select the internal clock * (SDR0_ETH_CFG[EMACx_PHY_CLK] = 1). * After a soft reset, select the external clock. + * + * The AR8035-A PHY Meraki MR24 does not provide a TX Clk if the + * ethernet cable is not attached. This causes the reset to timeout + * and the PHY detection code in emac_init_phy() is unable to + * communicate and detect the AR8035-A PHY. As a result, the emac + * driver bails out early and the user has no ethernet. + * In order to stay compatible with existing configurations, the + * driver will temporarily switch to the internal clock, after + * the first reset fails. */ if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: select internal loop clock before reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, 0, SDR0_ETH_CFG_ECS << dev->cell_index); @@ -382,8 +393,15 @@ static int emac_reset(struct emac_instance *dev) #ifdef CONFIG_PPC_DCR_NATIVE if (emac_has_feature(dev, EMAC_FTR_460EX_PHY_CLK_FIX)) { - if (dev->phy_address == 0xffffffff && - dev->phy_map == 0xffffffff) { + if (!n && !try_internal_clock) { + /* first attempt has timed out. */ + n = 20; + try_internal_clock = true; + goto do_retry; + } + + if (try_internal_clock || (dev->phy_address == 0xffffffff && + dev->phy_map == 0xffffffff)) { /* No PHY: restore external clock source after reset */ dcri_clrset(SDR0, SDR0_ETH_CFG, SDR0_ETH_CFG_ECS << dev->cell_index, 0); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e356e9187e84..20d8806d2bff 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1182,6 +1182,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) struct e1000_hw *hw = &adapter->hw; if (er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID) { + struct sk_buff *skb = adapter->tx_hwtstamp_skb; struct skb_shared_hwtstamps shhwtstamps; u64 txstmp; @@ -1190,9 +1191,14 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) e1000e_systim_to_hwtstamp(adapter, &shhwtstamps, txstmp); - skb_tstamp_tx(adapter->tx_hwtstamp_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->tx_hwtstamp_skb); + /* Clear the global tx_hwtstamp_skb pointer and force writes + * prior to notifying the stack of a Tx timestamp. + */ adapter->tx_hwtstamp_skb = NULL; + wmb(); /* force write prior to skb_tstamp_tx */ + + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); } else if (time_after(jiffies, adapter->tx_hwtstamp_start + adapter->tx_timeout_factor * HZ)) { dev_kfree_skb_any(adapter->tx_hwtstamp_skb); @@ -6589,12 +6595,17 @@ static int e1000e_pm_thaw(struct device *dev) static int e1000e_pm_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); + int rc; e1000e_flush_lpic(pdev); e1000e_pm_freeze(dev); - return __e1000_shutdown(pdev, false); + rc = __e1000_shutdown(pdev, false); + if (rc) + e1000e_pm_thaw(dev); + + return rc; } static int e1000e_pm_resume(struct device *dev) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 4b62aa1f9ff8..6e5065f0907b 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5079,7 +5079,7 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&hw->restart_work, sky2_restart); pci_set_drvdata(pdev, hw); - pdev->d3_delay = 150; + pdev->d3_delay = 200; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1d4e2e054647..897d061e4f03 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -35,6 +35,7 @@ #include <linux/etherdevice.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> #include <linux/export.h> #include "mlx4.h" @@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + if (!mlx4_qp_lookup(dev, rule->qpn)) { + mlx4_err_rule(dev, "QP doesn't exist\n", rule); + ret = -EINVAL; + goto out; + } + trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); - if (ret < 0) { - mlx4_free_cmd_mailbox(dev, mailbox); - return ret; - } + if (ret < 0) + goto out; + size += ret; } @@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev, } } +out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index d8359ffba026..62f1a3433a62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -381,6 +381,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + + spin_unlock(&qp_table->lock); + return qp; +} + int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -468,6 +481,12 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, } if (attr & MLX4_UPDATE_QP_QOS_VPORT) { + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_QOS_VPP)) { + mlx4_warn(dev, "Granular QoS per VF is not enabled\n"); + err = -EOPNOTSUPP; + goto out; + } + qp_mask |= 1ULL << MLX4_UPD_QP_MASK_QOS_VPP; cmd->qp_context.qos_vport = params->qos_vport; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d1fc7fa87b05..e3080fbd9d00 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -5040,6 +5040,13 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } +static void update_qos_vpp(struct mlx4_update_qp_context *ctx, + struct mlx4_vf_immed_vlan_work *work) +{ + ctx->qp_mask |= cpu_to_be64(1ULL << MLX4_UPD_QP_MASK_QOS_VPP); + ctx->qp_context.qos_vport = work->qos_vport; +} + void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) { struct mlx4_vf_immed_vlan_work *work = @@ -5144,11 +5151,10 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) qp->sched_queue & 0xC7; upd_context->qp_context.pri_path.sched_queue |= ((work->qos & 0x7) << 3); - upd_context->qp_mask |= - cpu_to_be64(1ULL << - MLX4_UPD_QP_MASK_QOS_VPP); - upd_context->qp_context.qos_vport = - work->qos_vport; + + if (dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_QOS_VPP) + update_qos_vpp(upd_context, work); } err = mlx4_cmd(dev, mailbox->dma, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f5c1f4acc57b..7c42be586be8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -513,7 +513,6 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); @@ -523,18 +522,11 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), priv->irq_info[i].mask); - err = irq_set_affinity_hint(irq, priv->irq_info[i].mask); - if (err) { - mlx5_core_warn(mdev, "irq_set_affinity_hint failed,irq 0x%.4x", - irq); - goto err_clear_mask; - } + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, priv->irq_info[i].mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); return 0; - -err_clear_mask: - free_cpumask_var(priv->irq_info[i].mask); - return err; } static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index b8d5270359cd..e30676515529 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -247,7 +247,7 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu) cmd.req.arg3 = 0; if (recv_ctx->state == NX_HOST_CTX_STATE_ACTIVE) - netxen_issue_cmd(adapter, &cmd); + rcode = netxen_issue_cmd(adapter, &cmd); if (rcode != NX_RCODE_SUCCESS) return -EIO; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index 509b596cf1e8..bd1ec70fb736 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -341,7 +341,7 @@ qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg) } return -EIO; } - usleep_range(1000, 1500); + udelay(1200); } if (id_reg) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index be258d90de9e..e3223f2fe2ff 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -765,7 +765,7 @@ int ql_core_dump(struct ql_adapter *qdev, struct ql_mpi_coredump *mpi_coredump) sizeof(struct mpi_coredump_global_header); mpi_coredump->mpi_global_header.imageSize = sizeof(struct ql_mpi_coredump); - memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", + strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", sizeof(mpi_coredump->mpi_global_header.idString)); /* Get generic NIC reg dump */ @@ -1255,7 +1255,7 @@ static void ql_gen_reg_dump(struct ql_adapter *qdev, sizeof(struct mpi_coredump_global_header); mpi_coredump->mpi_global_header.imageSize = sizeof(struct ql_reg_dump); - memcpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", + strncpy(mpi_coredump->mpi_global_header.idString, "MPI Coredump", sizeof(mpi_coredump->mpi_global_header.idString)); diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 1ef03939d25f..c90ae4d4be7d 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -296,8 +296,9 @@ qcaspi_receive(struct qcaspi *qca) /* Allocate rx SKB if we don't have one available. */ if (!qca->rx_skb) { - qca->rx_skb = netdev_alloc_skb(net_dev, - net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, + net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); qca->stats.out_of_mem++; @@ -377,7 +378,7 @@ qcaspi_receive(struct qcaspi *qca) qca->rx_skb, qca->rx_skb->dev); qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY; netif_rx_ni(qca->rx_skb); - qca->rx_skb = netdev_alloc_skb(net_dev, + qca->rx_skb = netdev_alloc_skb_ip_align(net_dev, net_dev->mtu + VLAN_ETH_HLEN); if (!qca->rx_skb) { netdev_dbg(net_dev, "out of RX resources\n"); @@ -759,7 +760,8 @@ qcaspi_netdev_init(struct net_device *dev) if (!qca->rx_buffer) return -ENOBUFS; - qca->rx_skb = netdev_alloc_skb(dev, qca->net_dev->mtu + VLAN_ETH_HLEN); + qca->rx_skb = netdev_alloc_skb_ip_align(dev, qca->net_dev->mtu + + VLAN_ETH_HLEN); if (!qca->rx_skb) { kfree(qca->rx_buffer); netdev_info(qca->net_dev, "Failed to allocate RX sk_buff.\n"); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3783c40f568b..a82c89af7124 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8411,12 +8411,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_msi_4; } + pci_set_drvdata(pdev, dev); + rc = register_netdev(dev); if (rc < 0) goto err_out_cnt_5; - pci_set_drvdata(pdev, dev); - netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n", rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr, (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 424d1dee55c9..afaf79b8761f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3222,7 +3222,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) /* MDIO bus init */ ret = sh_mdio_init(mdp, pd); if (ret) { - dev_err(&ndev->dev, "failed to initialise MDIO\n"); + dev_err(&pdev->dev, "failed to initialise MDIO\n"); goto out_release; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 435466c17852..c69b0bdd891d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -280,6 +280,10 @@ struct cpsw_ss_regs { /* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */ #define CPSW_V1_SEQ_ID_OFS_SHIFT 16 +#define CPSW_MAX_BLKS_TX 15 +#define CPSW_MAX_BLKS_TX_SHIFT 4 +#define CPSW_MAX_BLKS_RX 5 + struct cpsw_host_regs { u32 max_blks; u32 blk_cnt; @@ -1127,11 +1131,23 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) switch (priv->version) { case CPSW_VERSION_1: slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP); + /* Increase RX FIFO size to 5 for supporting fullduplex + * flow control mode + */ + slave_write(slave, + (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) | + CPSW_MAX_BLKS_RX, CPSW1_MAX_BLKS); break; case CPSW_VERSION_2: case CPSW_VERSION_3: case CPSW_VERSION_4: slave_write(slave, TX_PRIORITY_MAPPING, CPSW2_TX_PRI_MAP); + /* Increase RX FIFO size to 5 for supporting fullduplex + * flow control mode + */ + slave_write(slave, + (CPSW_MAX_BLKS_TX << CPSW_MAX_BLKS_TX_SHIFT) | + CPSW_MAX_BLKS_RX, CPSW2_MAX_BLKS); break; } diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c index 49fe59b180a8..a75ce9051a7f 100644 --- a/drivers/net/hamradio/hdlcdrv.c +++ b/drivers/net/hamradio/hdlcdrv.c @@ -574,6 +574,8 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) case HDLCDRVCTL_CALIBRATE: if(!capable(CAP_SYS_RAWIO)) return -EPERM; + if (s->par.bitrate <= 0) + return -EINVAL; if (bi.data.calibrate > INT_MAX / s->par.bitrate) return -EINVAL; s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 7d0690433ee0..7d2cf015c5e7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -148,6 +148,12 @@ static inline int phy_aneg_done(struct phy_device *phydev) if (phydev->drv->aneg_done) return phydev->drv->aneg_done(phydev); + /* Avoid genphy_aneg_done() if the Clause 45 PHY does not + * implement Clause 22 registers + */ + if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0))) + return -EINVAL; + return genphy_aneg_done(phydev); } diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index f7e8c79349ad..12a627fcc02c 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -501,7 +501,6 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.mtu = dst_mtu(&rt->dst); if (!po->chan.mtu) po->chan.mtu = PPP_MRU; - ip_rt_put(rt); po->chan.mtu -= PPTP_HEADER_OVERHEAD; po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header); diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 27ed25252aac..cfd81eb1b532 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) if(x < 0 || x > comp->rslot_limit) goto bad; + /* Check if the cstate is initialized */ + if (!comp->rstate[x].initialized) + goto bad; + comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { @@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) if (cs->cs_tcp.doff > 5) memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; + cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f9343bee1de3..6578127db847 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -705,6 +705,12 @@ static const struct usb_device_id products[] = { USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, { + /* Cinterion AHS3 modem by GEMALTO */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, +}, { /* Telit modules */ USB_VENDOR_AND_INTERFACE_INFO(0x1bc7, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 72cb30828a12..c8e98c8e29fa 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1069,6 +1069,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) u16 n = 0, index, ndplen; u8 ready2send = 0; u32 delayed_ndp_size; + size_t padding_count; /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated * accordingly. Otherwise, we should check here. @@ -1225,11 +1226,13 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * a ZLP after full sized NTBs. */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && - skb_out->len > ctx->min_tx_pkt) - memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, - ctx->tx_max - skb_out->len); - else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0) + skb_out->len > ctx->min_tx_pkt) { + padding_count = ctx->tx_max - skb_out->len; + memset(skb_put(skb_out, padding_count), 0, padding_count); + } else if (skb_out->len < ctx->tx_max && + (skb_out->len % dev->maxpacket) == 0) { *skb_put(skb_out, 1) = 0; /* force short packet */ + } /* set final frame length */ nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ebdee8f01f65..a6d429950cb0 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -618,7 +618,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset, offset += 0x100; else ret = -EINVAL; - ret = lan78xx_read_raw_otp(dev, offset, length, data); + if (!ret) + ret = lan78xx_read_raw_otp(dev, offset, length, data); } return ret; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8dfc75250583..d01285250204 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -556,7 +556,12 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, hdr = skb_vnet_hdr(skb); sg_init_table(rq->sg, 2); sg_set_buf(rq->sg, hdr, vi->hdr_len); - skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + + err = skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); + if (unlikely(err < 0)) { + dev_kfree_skb(skb); + return err; + } err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); if (err < 0) @@ -858,7 +863,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) struct virtio_net_hdr_mrg_rxbuf *hdr; const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; struct virtnet_info *vi = sq->vq->vdev->priv; - unsigned num_sg; + int num_sg; unsigned hdr_len = vi->hdr_len; bool can_push; @@ -911,11 +916,16 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) if (can_push) { __skb_push(skb, hdr_len); num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len); + if (unlikely(num_sg < 0)) + return num_sg; /* Pull header back to avoid skew in tx bytes calculations. */ __skb_pull(skb, hdr_len); } else { sg_set_buf(sq->sg, hdr, hdr_len); - num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1; + num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); + if (unlikely(num_sg < 0)) + return num_sg; + num_sg++; } return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 82bf85ae5d08..419c045d0752 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2789,6 +2789,11 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter) /* we need to enable NAPI, otherwise dev_close will deadlock */ for (i = 0; i < adapter->num_rx_queues; i++) napi_enable(&adapter->rx_queue[i].napi); + /* + * Need to clear the quiesce bit to ensure that vmxnet3_close + * can quiesce the device properly + */ + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); dev_close(adapter->netdev); } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ac945f8781ac..d3d59122a357 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -550,13 +550,15 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); - if (!IS_ERR(neigh)) + if (!IS_ERR(neigh)) { ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; + } rcu_read_unlock_bh(); err: - if (unlikely(ret < 0)) - vrf_tx_error(skb->dev, skb); + vrf_tx_error(skb->dev, skb); return ret; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e4ff1e45c02e..c41378214ede 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -962,7 +962,7 @@ static bool vxlan_snoop(struct net_device *dev, return false; /* Don't migrate static entries, drop packets */ - if (f->state & NUD_NOARP) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) return true; if (net_ratelimit()) diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c index 654a1e33f827..7c5f189cace7 100644 --- a/drivers/net/wireless/ath/ath5k/debug.c +++ b/drivers/net/wireless/ath/ath5k/debug.c @@ -939,7 +939,10 @@ static int open_file_eeprom(struct inode *inode, struct file *file) } for (i = 0; i < eesize; ++i) { - AR5K_EEPROM_READ(i, val); + if (!ath5k_hw_nvram_read(ah, i, &val)) { + ret = -EIO; + goto freebuf; + } buf[i] = val; } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 0881ba8535f4..c78abfc7bd96 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -247,7 +247,10 @@ static const UCHAR b4_default_startup_parms[] = { 0x04, 0x08, /* Noise gain, limit offset */ 0x28, 0x28, /* det rssi, med busy offsets */ 7, /* det sync thresh */ - 0, 2, 2 /* test mode, min, max */ + 0, 2, 2, /* test mode, min, max */ + 0, /* rx/tx delay */ + 0, 0, 0, 0, 0, 0, /* current BSS id */ + 0 /* hop set */ }; /*===========================================================================*/ @@ -598,7 +601,7 @@ static void init_startup_params(ray_dev_t *local) * a_beacon_period = hops a_beacon_period = KuS *//* 64ms = 010000 */ if (local->fw_ver == 0x55) { - memcpy((UCHAR *) &local->sparm.b4, b4_default_startup_parms, + memcpy(&local->sparm.b4, b4_default_startup_parms, sizeof(struct b4_startup_params)); /* Translate sane kus input values to old build 4/5 format */ /* i = hop time in uS truncated to 3 bytes */ diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index b7f72f9c7988..b3691712df61 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1454,6 +1454,7 @@ static int rtl8187_probe(struct usb_interface *intf, goto err_free_dev; } mutex_init(&priv->io_mutex); + mutex_init(&priv->conf_mutex); SET_IEEE80211_DEV(dev, &intf->dev); usb_set_intfdata(intf, dev); @@ -1627,7 +1628,6 @@ static int rtl8187_probe(struct usb_interface *intf, printk(KERN_ERR "rtl8187: Cannot register device\n"); goto err_free_dmabuf; } - mutex_init(&priv->conf_mutex); skb_queue_head_init(&priv->b_tx_status.queue); wiphy_info(dev->wiphy, "hwaddr %pM, %s V%d + %s, rfkill mask %d\n", diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 9bee3f11898a..869411f55d88 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -1196,8 +1196,7 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw, WARN_ON(wl->bss_type != BSS_TYPE_STA_BSS); enable = bss_conf->arp_addr_cnt == 1 && bss_conf->assoc; - wl1251_acx_arp_ip_filter(wl, enable, addr); - + ret = wl1251_acx_arp_ip_filter(wl, enable, addr); if (ret < 0) goto out_sleep; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 0b8d2655985f..fee4c01fbdfd 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -2024,7 +2024,10 @@ static void netback_changed(struct xenbus_device *dev, case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: + break; + case XenbusStateUnknown: + wake_up_all(&module_unload_q); break; case XenbusStateInitWait: @@ -2155,7 +2158,9 @@ static int xennet_remove(struct xenbus_device *dev) xenbus_switch_state(dev, XenbusStateClosing); wait_event(module_unload_q, xenbus_read_driver_state(dev->otherend) == - XenbusStateClosing); + XenbusStateClosing || + xenbus_read_driver_state(dev->otherend) == + XenbusStateUnknown); xenbus_switch_state(dev, XenbusStateClosed); wait_event(module_unload_q, diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 0b3e0bfa7be5..572ca192cb1f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -587,6 +587,7 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) { unsigned long long sta = 0; struct acpiphp_func *func; + u32 dvid; list_for_each_entry(func, &slot->funcs, sibling) { if (func->flags & FUNC_HAS_STA) { @@ -597,19 +598,27 @@ static unsigned int get_slot_status(struct acpiphp_slot *slot) if (ACPI_SUCCESS(status) && sta) break; } else { - u32 dvid; - - pci_bus_read_config_dword(slot->bus, - PCI_DEVFN(slot->device, - func->function), - PCI_VENDOR_ID, &dvid); - if (dvid != 0xffffffff) { + if (pci_bus_read_dev_vendor_id(slot->bus, + PCI_DEVFN(slot->device, func->function), + &dvid, 0)) { sta = ACPI_STA_ALL; break; } } } + if (!sta) { + /* + * Check for the slot itself since it may be that the + * ACPI slot is a device below PCIe upstream port so in + * that case it may not even be reachable yet. + */ + if (pci_bus_read_dev_vendor_id(slot->bus, + PCI_DEVFN(slot->device, 0), &dvid, 0)) { + sta = ACPI_STA_ALL; + } + } + return (unsigned int)sta; } diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 84419af16f77..fd12ccc11e26 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -538,6 +538,7 @@ struct powercap_zone *powercap_register_zone( power_zone->id = result; idr_init(&power_zone->idr); + result = -ENOMEM; power_zone->name = kstrdup(name, GFP_KERNEL); if (!power_zone->name) goto err_name_alloc; diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 853976bd3d36..9473715725df 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -217,6 +217,13 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) missing = year; } + /* Can't proceed if alarm is still invalid after replacing + * missing fields. + */ + err = rtc_valid_tm(&alarm->time); + if (err) + goto done; + /* with luck, no rollover is needed */ t_now = rtc_tm_to_time64(&now); t_alm = rtc_tm_to_time64(&alarm->time); @@ -268,9 +275,9 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) dev_warn(&rtc->dev, "alarm rollover not handled\n"); } -done: err = rtc_valid_tm(&alarm->time); +done: if (err) { dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n", alarm->time.tm_year + 1900, alarm->time.tm_mon + 1, diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index 229dd2fe8f45..c6b0c7ed7a30 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -150,6 +150,16 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32); + + /* check if no alarm is set */ + if (y_m_d == 0 && h_m_s_ms == 0) { + pr_debug("No alarm is set\n"); + rc = -ENOENT; + goto exit; + } else { + pr_debug("Alarm set to %x %llx\n", y_m_d, h_m_s_ms); + } + opal_to_tm(y_m_d, h_m_s_ms, &alarm->time); exit: diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c index 950c5d0b6dca..afab89f5be48 100644 --- a/drivers/rtc/rtc-snvs.c +++ b/drivers/rtc/rtc-snvs.c @@ -257,7 +257,7 @@ static int snvs_rtc_probe(struct platform_device *pdev) of_property_read_u32(pdev->dev.of_node, "offset", &data->offset); } - if (!data->regmap) { + if (IS_ERR(data->regmap)) { dev_err(&pdev->dev, "Can't find snvs syscon\n"); return -ENODEV; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e7a6f1222642..b76a85d14ef0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1881,8 +1881,12 @@ static int __dasd_device_is_unusable(struct dasd_device *device, { int mask = ~(DASD_STOPPED_DC_WAIT | DASD_UNRESUMED_PM); - if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { - /* dasd is being set offline. */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags) && + !test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + /* + * dasd is being set offline + * but it is no safe offline where we have to allow I/O + */ return 1; } if (device->stopped) { diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4bb5262f7aee..742ca57ece8c 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -126,7 +126,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, int start, int count, int auto_ack) { - int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0; + int rc, tmp_count = count, tmp_start = start, nr = q->nr; unsigned int ccq = 0; qperf_inc(q, eqbs); @@ -149,14 +149,7 @@ again: qperf_inc(q, eqbs_partial); DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x", tmp_count); - /* - * Retry once, if that fails bail out and process the - * extracted buffers before trying again. - */ - if (!retried++) - goto again; - else - return count - tmp_count; + return count - tmp_count; } DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); @@ -212,7 +205,10 @@ again: return 0; } -/* returns number of examined buffers and their common state in *state */ +/* + * Returns number of examined buffers and their common state in *state. + * Requested number of buffers-to-examine must be > 0. + */ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, unsigned char *state, unsigned int count, int auto_ack, int merge_pending) @@ -223,17 +219,23 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr, if (is_qebsm(q)) return qdio_do_eqbs(q, state, bufnr, count, auto_ack); - for (i = 0; i < count; i++) { - if (!__state) { - __state = q->slsb.val[bufnr]; - if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) - __state = SLSB_P_OUTPUT_EMPTY; - } else if (merge_pending) { - if ((q->slsb.val[bufnr] & __state) != __state) - break; - } else if (q->slsb.val[bufnr] != __state) - break; + /* get initial state: */ + __state = q->slsb.val[bufnr]; + if (merge_pending && __state == SLSB_P_OUTPUT_PENDING) + __state = SLSB_P_OUTPUT_EMPTY; + + for (i = 1; i < count; i++) { bufnr = next_buf(bufnr); + + /* merge PENDING into EMPTY: */ + if (merge_pending && + q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING && + __state == SLSB_P_OUTPUT_EMPTY) + continue; + + /* stop if next state differs from initial state: */ + if (q->slsb.val[bufnr] != __state) + break; } *state = __state; return i; diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 499e369eabf0..8bc1625337f6 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -191,6 +191,7 @@ struct bnx2fc_hba { struct bnx2fc_cmd_mgr *cmd_mgr; spinlock_t hba_lock; struct mutex hba_mutex; + struct mutex hba_stats_mutex; unsigned long adapter_state; #define ADAPTER_STATE_UP 0 #define ADAPTER_STATE_GOING_DOWN 1 diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 67405c628864..d0b227ffbd5f 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -641,15 +641,17 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) if (!fw_stats) return NULL; + mutex_lock(&hba->hba_stats_mutex); + bnx2fc_stats = fc_get_host_stats(shost); init_completion(&hba->stat_req_done); if (bnx2fc_send_stat_req(hba)) - return bnx2fc_stats; + goto unlock_stats_mutex; rc = wait_for_completion_timeout(&hba->stat_req_done, (2 * HZ)); if (!rc) { BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); - return bnx2fc_stats; + goto unlock_stats_mutex; } BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; @@ -671,6 +673,9 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) memcpy(&hba->prev_stats, hba->stats_buffer, sizeof(struct fcoe_statistics_params)); + +unlock_stats_mutex: + mutex_unlock(&hba->hba_stats_mutex); return bnx2fc_stats; } @@ -1302,6 +1307,7 @@ static struct bnx2fc_hba *bnx2fc_hba_create(struct cnic_dev *cnic) } spin_lock_init(&hba->hba_lock); mutex_init(&hba->hba_mutex); + mutex_init(&hba->hba_stats_mutex); hba->cnic = cnic; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 622bdabc8894..dab195f04da7 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -1769,7 +1769,6 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) goto bye; } - mempool_free(mbp, hw->mb_mempool); if (finicsum != cfcsum) { csio_warn(hw, "Config File checksum mismatch: csum=%#x, computed=%#x\n", @@ -1780,6 +1779,10 @@ csio_hw_use_fwconfig(struct csio_hw *hw, int reset, u32 *fw_cfg_param) rv = csio_hw_validate_caps(hw, mbp); if (rv != 0) goto bye; + + mempool_free(mbp, hw->mb_mempool); + mbp = NULL; + /* * Note that we're operating with parameters * not supplied by the driver, rather than from hard-wired diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index efce04df2109..9f0b00c38658 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1695,6 +1695,15 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) */ switch (session->state) { case ISCSI_STATE_FAILED: + /* + * cmds should fail during shutdown, if the session + * state is bad, allowing completion to happen + */ + if (unlikely(system_state != SYSTEM_RUNNING)) { + reason = FAILURE_SESSION_FAILED; + sc->result = DID_NO_CONNECT << 16; + break; + } case ISCSI_STATE_IN_RECOVERY: reason = FAILURE_SESSION_IN_RECOVERY; sc->result = DID_IMM_RETRY << 16; @@ -1980,6 +1989,19 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) if (session->state != ISCSI_STATE_LOGGED_IN) { /* + * During shutdown, if session is prematurely disconnected, + * recovery won't happen and there will be hung cmds. Not + * handling cmds would trigger EH, also bad in this case. + * Instead, handle cmd, allow completion to happen and let + * upper layer to deal with the result. + */ + if (unlikely(system_state != SYSTEM_RUNNING)) { + sc->result = DID_NO_CONNECT << 16; + ISCSI_DBG_EH(session, "sc on shutdown, handled\n"); + rc = BLK_EH_HANDLED; + goto done; + } + /* * We are probably in the middle of iscsi recovery so let * that complete and handle the error. */ @@ -2083,7 +2105,7 @@ done: task->last_timeout = jiffies; spin_unlock(&session->frwd_lock); ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? - "timer reset" : "nh"); + "timer reset" : "shutdown or nh"); return rc; } diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 022bb6e10d98..12886f96b286 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -282,6 +282,7 @@ static void sas_set_ex_phy(struct domain_device *dev, int phy_id, void *rsp) phy->phy->minimum_linkrate = dr->pmin_linkrate; phy->phy->maximum_linkrate = dr->pmax_linkrate; phy->phy->negotiated_linkrate = phy->linkrate; + phy->phy->enabled = (phy->linkrate != SAS_PHY_DISABLED); skip: if (new_phy) @@ -675,7 +676,7 @@ int sas_smp_get_phy_events(struct sas_phy *phy) res = smp_execute_task(dev, req, RPEL_REQ_SIZE, resp, RPEL_RESP_SIZE); - if (!res) + if (res) goto out; phy->invalid_dword_count = scsi_to_u32(&resp[12]); @@ -684,6 +685,7 @@ int sas_smp_get_phy_events(struct sas_phy *phy) phy->phy_reset_problem_count = scsi_to_u32(&resp[24]); out: + kfree(req); kfree(resp); return res; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index e111c3d8c5d6..b868ef3b2ca3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3886,19 +3886,6 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } - /* - * Bug work around for firmware SATL handling. The loop - * is based on atomic operations and ensures consistency - * since we're lockless at this point - */ - do { - if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { - scmd->result = SAM_STAT_BUSY; - scmd->scsi_done(scmd); - return 0; - } - } while (_scsih_set_satl_pending(scmd, true)); - sas_target_priv_data = sas_device_priv_data->sas_target; /* invalid device handle */ @@ -3924,6 +3911,19 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) sas_device_priv_data->block) return SCSI_MLQUEUE_DEVICE_BUSY; + /* + * Bug work around for firmware SATL handling. The loop + * is based on atomic operations and ensures consistency + * since we're lockless at this point + */ + do { + if (test_bit(0, &sas_device_priv_data->ata_command_pending)) { + scmd->result = SAM_STAT_BUSY; + scmd->scsi_done(scmd); + return 0; + } + } while (_scsih_set_satl_pending(scmd, true)); + if (scmd->sc_data_direction == DMA_FROM_DEVICE) mpi_control = MPI2_SCSIIO_CONTROL_READ; else if (scmd->sc_data_direction == DMA_TO_DEVICE) @@ -3945,6 +3945,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (!smid) { pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n", ioc->name, __func__); + _scsih_set_satl_pending(scmd, false); goto out; } mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); @@ -3975,6 +3976,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) if (mpi_request->DataLength) { if (ioc->build_sg_scmd(ioc, scmd, smid)) { mpt3sas_base_free_smid(ioc, smid); + _scsih_set_satl_pending(scmd, false); goto out; } } else diff --git a/drivers/soc/qcom/hab/hab.h b/drivers/soc/qcom/hab/hab.h index ce4c94fa75c9..ffb0637055d4 100644 --- a/drivers/soc/qcom/hab/hab.h +++ b/drivers/soc/qcom/hab/hab.h @@ -147,7 +147,8 @@ struct hab_header { (((vcid) & HAB_VCID_ID_MASK) >> HAB_VCID_ID_SHIFT) -#define HAB_HEADER_SET_SESSION_ID(header, sid) ((header).session_id = (sid)) +#define HAB_HEADER_SET_SESSION_ID(header, sid) \ + ((header).session_id = (sid)) #define HAB_HEADER_SET_SIZE(header, size) \ ((header).id_type_size = ((header).id_type_size & \ @@ -281,8 +282,8 @@ struct uhab_context { }; /* - * array to describe the VM and its MMID configuration as what is connected to - * so this is describing a pchan's remote side + * array to describe the VM and its MMID configuration as + * what is connected to so this is describing a pchan's remote side */ struct vmid_mmid_desc { int vmid; /* remote vmid */ @@ -341,8 +342,9 @@ struct virtual_channel { }; /* - * Struct shared between local and remote, contents are composed by exporter, - * the importer only writes to pdata and local (exporter) domID + * Struct shared between local and remote, contents + * are composed by exporter, the importer only writes + * to pdata and local (exporter) domID */ struct export_desc { uint32_t export_id; @@ -410,16 +412,10 @@ int habmem_hyp_revoke(void *expdata, uint32_t count); void *habmem_imp_hyp_open(void); void habmem_imp_hyp_close(void *priv, int kernel); -long habmem_imp_hyp_map(void *priv, void *impdata, uint32_t count, - uint32_t remotedom, - uint64_t *index, - void **pkva, - int kernel, - uint32_t userflags); +int habmem_imp_hyp_map(void *imp_ctx, struct hab_import *param, + struct export_desc *exp, int kernel); -long habmm_imp_hyp_unmap(void *priv, uint64_t index, - uint32_t count, - int kernel); +int habmm_imp_hyp_unmap(void *imp_ctx, struct export_desc *exp); int habmem_imp_hyp_mmap(struct file *flip, struct vm_area_struct *vma); diff --git a/drivers/soc/qcom/hab/hab_mem_linux.c b/drivers/soc/qcom/hab/hab_mem_linux.c index ecc3f52a6662..a779067ee4c4 100644 --- a/drivers/soc/qcom/hab/hab_mem_linux.c +++ b/drivers/soc/qcom/hab/hab_mem_linux.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -29,6 +29,9 @@ struct pages_list { uint32_t userflags; struct file *filp_owner; struct file *filp_mapper; + struct dma_buf *dmabuf; + int32_t export_id; + int32_t vcid; }; struct importer_context { @@ -58,7 +61,7 @@ static int match_file(const void *p, struct file *file, unsigned int fd) } -static int habmem_get_dma_pages(unsigned long address, +static int habmem_get_dma_pages_from_va(unsigned long address, int page_count, struct page **pages) { @@ -142,6 +145,56 @@ err: return rc; } +static int habmem_get_dma_pages_from_fd(int32_t fd, + int page_count, + struct page **pages) +{ + struct dma_buf *dmabuf = NULL; + struct scatterlist *s; + struct sg_table *sg_table = NULL; + struct dma_buf_attachment *attach = NULL; + struct page *page; + int i, j, rc = 0; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + attach = dma_buf_attach(dmabuf, hab_driver.dev); + if (IS_ERR_OR_NULL(attach)) { + pr_err("dma_buf_attach failed\n"); + goto err; + } + + sg_table = dma_buf_map_attachment(attach, DMA_TO_DEVICE); + + if (IS_ERR_OR_NULL(sg_table)) { + pr_err("dma_buf_map_attachment failed\n"); + goto err; + } + + for_each_sg(sg_table->sgl, s, sg_table->nents, i) { + page = sg_page(s); + pr_debug("sgl length %d\n", s->length); + + for (j = 0; j < (s->length >> PAGE_SHIFT); j++) { + pages[rc] = nth_page(page, j); + rc++; + if (WARN_ON(rc >= page_count)) + break; + } + } + +err: + if (!IS_ERR_OR_NULL(sg_table)) + dma_buf_unmap_attachment(attach, sg_table, DMA_TO_DEVICE); + if (!IS_ERR_OR_NULL(attach)) + dma_buf_detach(dmabuf, attach); + if (!IS_ERR_OR_NULL(dmabuf)) + dma_buf_put(dmabuf); + return rc; +} + /* * exporter - grant & revoke * degenerate sharabled page list based on CPU friendly virtual "address". @@ -165,7 +218,11 @@ int habmem_hyp_grant_user(unsigned long address, down_read(¤t->mm->mmap_sem); if (HABMM_EXP_MEM_TYPE_DMA & flags) { - ret = habmem_get_dma_pages(address, + ret = habmem_get_dma_pages_from_va(address, + page_count, + pages); + } else if (HABMM_EXPIMP_FLAGS_FD & flags) { + ret = habmem_get_dma_pages_from_fd(address, page_count, pages); } else { @@ -260,30 +317,156 @@ void habmem_imp_hyp_close(void *imp_ctx, int kernel) kfree(priv); } -/* - * setup pages, be ready for the following mmap call - * index is output to refer to this imported buffer described by the import data - */ -long habmem_imp_hyp_map(void *imp_ctx, - void *impdata, - uint32_t count, - uint32_t remotedom, - uint64_t *index, - void **pkva, - int kernel, - uint32_t userflags) +static struct sg_table *hab_mem_map_dma_buf( + struct dma_buf_attachment *attachment, + enum dma_data_direction direction) +{ + struct dma_buf *dmabuf = attachment->dmabuf; + struct pages_list *pglist = dmabuf->priv; + struct sg_table *sgt; + struct scatterlist *sg; + int i; + int ret = 0; + struct page **pages = pglist->pages; + + sgt = kzalloc(sizeof(struct sg_table), GFP_KERNEL); + if (!sgt) + return ERR_PTR(-ENOMEM); + + ret = sg_alloc_table(sgt, pglist->npages, GFP_KERNEL); + if (ret) { + kfree(sgt); + return ERR_PTR(-ENOMEM); + } + + for_each_sg(sgt->sgl, sg, pglist->npages, i) { + sg_set_page(sg, pages[i], PAGE_SIZE, 0); + } + + return sgt; +} + + +static void hab_mem_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *sgt, + enum dma_data_direction direction) +{ + sg_free_table(sgt); + kfree(sgt); +} + +static int hab_map_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *page; + struct pages_list *pglist; + + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + + /* PHY address */ + unsigned long fault_offset = + (unsigned long)vmf->virtual_address - vma->vm_start + offset; + unsigned long fault_index = fault_offset>>PAGE_SHIFT; + int page_idx; + + if (vma == NULL) + return VM_FAULT_SIGBUS; + + pglist = vma->vm_private_data; + + page_idx = fault_index - pglist->index; + if (page_idx < 0 || page_idx >= pglist->npages) { + pr_err("Out of page array! page_idx %d, pg cnt %ld", + page_idx, pglist->npages); + return VM_FAULT_SIGBUS; + } + + page = pglist->pages[page_idx]; + get_page(page); + vmf->page = page; + return 0; +} + +static void hab_map_open(struct vm_area_struct *vma) +{ +} + +static void hab_map_close(struct vm_area_struct *vma) +{ +} + +static const struct vm_operations_struct habmem_vm_ops = { + .fault = hab_map_fault, + .open = hab_map_open, + .close = hab_map_close, +}; + +static int hab_mem_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) +{ + struct pages_list *pglist = dmabuf->priv; + uint32_t obj_size = pglist->npages << PAGE_SHIFT; + + if (vma == NULL) + return VM_FAULT_SIGBUS; + + /* Check for valid size. */ + if (obj_size < vma->vm_end - vma->vm_start) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_ops = &habmem_vm_ops; + vma->vm_private_data = pglist; + vma->vm_flags |= VM_MIXEDMAP; + + return 0; +} + +static void hab_mem_dma_buf_release(struct dma_buf *dmabuf) +{ +} + +static void *hab_mem_dma_buf_kmap(struct dma_buf *dmabuf, + unsigned long offset) +{ + return NULL; +} + +static void hab_mem_dma_buf_kunmap(struct dma_buf *dmabuf, + unsigned long offset, + void *ptr) +{ +} + +static struct dma_buf_ops dma_buf_ops = { + .map_dma_buf = hab_mem_map_dma_buf, + .unmap_dma_buf = hab_mem_unmap_dma_buf, + .mmap = hab_mem_mmap, + .release = hab_mem_dma_buf_release, + .kmap_atomic = hab_mem_dma_buf_kmap, + .kunmap_atomic = hab_mem_dma_buf_kunmap, + .kmap = hab_mem_dma_buf_kmap, + .kunmap = hab_mem_dma_buf_kunmap, +}; + +static int habmem_imp_hyp_map_fd(void *imp_ctx, + struct export_desc *exp, + uint32_t userflags, + int32_t *pfd) { struct page **pages; - struct compressed_pfns *pfn_table = (struct compressed_pfns *)impdata; + struct compressed_pfns *pfn_table = + (struct compressed_pfns *)exp->payload; struct pages_list *pglist; struct importer_context *priv = imp_ctx; unsigned long pfn; int i, j, k = 0; + pgprot_t prot = PAGE_KERNEL; + int32_t fd; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); if (!pfn_table || !priv) return -EINVAL; - pages = vmalloc(count * sizeof(struct page *)); + pages = vmalloc(exp->payload_count * sizeof(struct page *)); if (!pages) return -ENOMEM; @@ -303,145 +486,230 @@ long habmem_imp_hyp_map(void *imp_ctx, } pglist->pages = pages; - pglist->npages = count; - pglist->kernel = kernel; - pglist->index = page_to_phys(pages[0]) >> PAGE_SHIFT; + pglist->npages = exp->payload_count; + pglist->kernel = 0; + pglist->index = 0; pglist->refcntk = pglist->refcntu = 0; pglist->userflags = userflags; + pglist->export_id = exp->export_id; + pglist->vcid = exp->vcid_remote; + + if (!(userflags & HABMM_IMPORT_FLAGS_CACHED)) + prot = pgprot_writecombine(prot); + + exp_info.ops = &dma_buf_ops; + exp_info.size = exp->payload_count << PAGE_SHIFT; + exp_info.flags = O_RDWR; + exp_info.priv = pglist; + pglist->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(pglist->dmabuf)) { + vfree(pages); + kfree(pglist); + return PTR_ERR(pglist->dmabuf); + } - *index = pglist->index << PAGE_SHIFT; - - if (kernel) { - pgprot_t prot = PAGE_KERNEL; - - if (!(userflags & HABMM_IMPORT_FLAGS_CACHED)) - prot = pgprot_writecombine(prot); - - pglist->kva = vmap(pglist->pages, pglist->npages, VM_MAP, prot); - if (pglist->kva == NULL) { - vfree(pages); - kfree(pglist); - pr_err("%ld pages vmap failed\n", pglist->npages); - return -ENOMEM; - } else { - pr_debug("%ld pages vmap pass, return %pK\n", - pglist->npages, pglist->kva); - } - - pglist->uva = NULL; - pglist->refcntk++; - *pkva = pglist->kva; - *index = (uint64_t)((uintptr_t)pglist->kva); - } else { - pglist->kva = NULL; + fd = dma_buf_fd(pglist->dmabuf, O_CLOEXEC); + if (fd < 0) { + dma_buf_put(pglist->dmabuf); + vfree(pages); + kfree(pglist); + return -EINVAL; } + pglist->refcntk++; + write_lock(&priv->implist_lock); list_add_tail(&pglist->list, &priv->imp_list); priv->cnt++; write_unlock(&priv->implist_lock); - pr_debug("index returned %llx\n", *index); + + *pfd = fd; return 0; } -/* the input index is PHY address shifted for uhab, and kva for khab */ -long habmm_imp_hyp_unmap(void *imp_ctx, - uint64_t index, - uint32_t count, - int kernel) +static int habmem_imp_hyp_map_kva(void *imp_ctx, + struct export_desc *exp, + uint32_t userflags, + void **pkva) { + struct page **pages; + struct compressed_pfns *pfn_table = + (struct compressed_pfns *)exp->payload; + struct pages_list *pglist; struct importer_context *priv = imp_ctx; - struct pages_list *pglist, *tmp; - int found = 0; - uint64_t pg_index = index >> PAGE_SHIFT; - - write_lock(&priv->implist_lock); - list_for_each_entry_safe(pglist, tmp, &priv->imp_list, list) { - pr_debug("node pglist %pK, kernel %d, pg_index %llx\n", - pglist, pglist->kernel, pg_index); + unsigned long pfn; + int i, j, k = 0; + pgprot_t prot = PAGE_KERNEL; - if (kernel) { - if (pglist->kva == (void *)((uintptr_t)index)) - found = 1; - } else { - if (pglist->index == pg_index) - found = 1; - } + if (!pfn_table || !priv) + return -EINVAL; + pages = vmalloc(exp->payload_count * sizeof(struct page *)); + if (!pages) + return -ENOMEM; + pglist = kzalloc(sizeof(*pglist), GFP_KERNEL); + if (!pglist) { + vfree(pages); + return -ENOMEM; + } - if (found) { - list_del(&pglist->list); - priv->cnt--; - break; + pfn = pfn_table->first_pfn; + for (i = 0; i < pfn_table->nregions; i++) { + for (j = 0; j < pfn_table->region[i].size; j++) { + pages[k] = pfn_to_page(pfn+j); + k++; } + pfn += pfn_table->region[i].size + pfn_table->region[i].space; } - write_unlock(&priv->implist_lock); - if (!found) { - pr_err("failed to find export id on index %llx\n", index); - return -EINVAL; + pglist->pages = pages; + pglist->npages = exp->payload_count; + pglist->kernel = 1; + pglist->refcntk = pglist->refcntu = 0; + pglist->userflags = userflags; + pglist->export_id = exp->export_id; + pglist->vcid = exp->vcid_remote; + + if (!(userflags & HABMM_IMPORT_FLAGS_CACHED)) + prot = pgprot_writecombine(prot); + + pglist->kva = vmap(pglist->pages, pglist->npages, VM_MAP, prot); + if (pglist->kva == NULL) { + vfree(pages); + kfree(pglist); + pr_err("%ld pages vmap failed\n", pglist->npages); + return -ENOMEM; } - pr_debug("detach pglist %pK, index %llx, kernel %d, list cnt %d\n", - pglist, pglist->index, pglist->kernel, priv->cnt); + pr_debug("%ld pages vmap pass, return %p\n", + pglist->npages, pglist->kva); - if (kernel) - if (pglist->kva) - vunmap(pglist->kva); + pglist->refcntk++; - vfree(pglist->pages); - kfree(pglist); + write_lock(&priv->implist_lock); + list_add_tail(&pglist->list, &priv->imp_list); + priv->cnt++; + write_unlock(&priv->implist_lock); + + *pkva = pglist->kva; return 0; } -static int hab_map_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +static int habmem_imp_hyp_map_uva(void *imp_ctx, + struct export_desc *exp, + uint32_t userflags, + uint64_t *index) { - struct page *page; + struct page **pages; + struct compressed_pfns *pfn_table = + (struct compressed_pfns *)exp->payload; struct pages_list *pglist; + struct importer_context *priv = imp_ctx; + unsigned long pfn; + int i, j, k = 0; - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - - /* PHY address */ - unsigned long fault_offset = - (unsigned long)vmf->virtual_address - vma->vm_start + offset; - unsigned long fault_index = fault_offset>>PAGE_SHIFT; - int page_idx; + if (!pfn_table || !priv) + return -EINVAL; - if (vma == NULL) - return VM_FAULT_SIGBUS; + pages = vmalloc(exp->payload_count * sizeof(struct page *)); + if (!pages) + return -ENOMEM; - pglist = vma->vm_private_data; + pglist = kzalloc(sizeof(*pglist), GFP_KERNEL); + if (!pglist) { + vfree(pages); + return -ENOMEM; + } - page_idx = fault_index - pglist->index; - if (page_idx < 0 || page_idx >= pglist->npages) { - pr_err("Out of page array. page_idx %d, pg cnt %ld", - page_idx, pglist->npages); - return VM_FAULT_SIGBUS; + pfn = pfn_table->first_pfn; + for (i = 0; i < pfn_table->nregions; i++) { + for (j = 0; j < pfn_table->region[i].size; j++) { + pages[k] = pfn_to_page(pfn+j); + k++; + } + pfn += pfn_table->region[i].size + pfn_table->region[i].space; } - pr_debug("Fault page index %d\n", page_idx); + pglist->pages = pages; + pglist->npages = exp->payload_count; + pglist->index = page_to_phys(pages[0]) >> PAGE_SHIFT; + pglist->refcntk = pglist->refcntu = 0; + pglist->userflags = userflags; + pglist->export_id = exp->export_id; + pglist->vcid = exp->vcid_remote; + + write_lock(&priv->implist_lock); + list_add_tail(&pglist->list, &priv->imp_list); + priv->cnt++; + write_unlock(&priv->implist_lock); + + *index = pglist->index << PAGE_SHIFT; - page = pglist->pages[page_idx]; - get_page(page); - vmf->page = page; return 0; } -static void hab_map_open(struct vm_area_struct *vma) +int habmem_imp_hyp_map(void *imp_ctx, struct hab_import *param, + struct export_desc *exp, int kernel) { + int ret = 0; + + if (kernel) + ret = habmem_imp_hyp_map_kva(imp_ctx, exp, + param->flags, + (void **)¶m->kva); + else if (param->flags & HABMM_EXPIMP_FLAGS_FD) + ret = habmem_imp_hyp_map_fd(imp_ctx, exp, + param->flags, + (int32_t *)¶m->kva); + else + ret = habmem_imp_hyp_map_uva(imp_ctx, exp, + param->flags, + ¶m->index); + + return ret; } -static void hab_map_close(struct vm_area_struct *vma) +int habmm_imp_hyp_unmap(void *imp_ctx, struct export_desc *exp) { -} + struct importer_context *priv = imp_ctx; + struct pages_list *pglist, *tmp; + int found = 0; -static const struct vm_operations_struct habmem_vm_ops = { + write_lock(&priv->implist_lock); + list_for_each_entry_safe(pglist, tmp, &priv->imp_list, list) { + if (pglist->export_id == exp->export_id && + pglist->vcid == exp->vcid_remote) { + found = 1; + } - .fault = hab_map_fault, - .open = hab_map_open, - .close = hab_map_close, -}; + if (found) { + list_del(&pglist->list); + priv->cnt--; + break; + } + } + write_unlock(&priv->implist_lock); + + if (!found) { + pr_err("failed to find export id %u\n", exp->export_id); + return -EINVAL; + } + + pr_debug("detach pglist %p, kernel %d, list cnt %d\n", + pglist, pglist->kernel, priv->cnt); + + if (pglist->kva) + vunmap(pglist->kva); + + if (pglist->dmabuf) + dma_buf_put(pglist->dmabuf); + + vfree(pglist->pages); + kfree(pglist); + + return 0; +} int habmem_imp_hyp_mmap(struct file *filp, struct vm_area_struct *vma) { diff --git a/drivers/soc/qcom/hab/hab_mimex.c b/drivers/soc/qcom/hab/hab_mimex.c index 67601590908e..00fbeabed4bb 100644 --- a/drivers/soc/qcom/hab/hab_mimex.c +++ b/drivers/soc/qcom/hab/hab_mimex.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -345,25 +345,20 @@ int hab_mem_import(struct uhab_context *ctx, exp->export_id, exp->payload_count, exp->domid_local, *((uint32_t *)exp->payload)); - ret = habmem_imp_hyp_map(ctx->import_ctx, - exp->payload, - exp->payload_count, - exp->domid_local, - &exp->import_index, - &exp->kva, - kernel, - param->flags); + ret = habmem_imp_hyp_map(ctx->import_ctx, param, exp, kernel); + if (ret) { pr_err("Import fail ret:%d pcnt:%d rem:%d 1st_ref:0x%X\n", ret, exp->payload_count, exp->domid_local, *((uint32_t *)exp->payload)); return ret; } - pr_debug("import index %llx, kva %llx, kernel %d\n", - exp->import_index, param->kva, kernel); - param->index = exp->import_index; - param->kva = (uint64_t)exp->kva; + exp->import_index = param->index; + exp->kva = kernel ? (void *)param->kva : NULL; + + pr_debug("import index %llx, kva or fd %llx, kernel %d\n", + exp->import_index, param->kva, kernel); return ret; } @@ -396,13 +391,10 @@ int hab_mem_unimport(struct uhab_context *ctx, if (!found) ret = -EINVAL; else { - ret = habmm_imp_hyp_unmap(ctx->import_ctx, - exp->import_index, - exp->payload_count, - kernel); + ret = habmm_imp_hyp_unmap(ctx->import_ctx, exp); if (ret) { - pr_err("unmap fail id:%d pcnt:%d kernel:%d\n", - exp->export_id, exp->payload_count, kernel); + pr_err("unmap fail id:%d pcnt:%d vcid:%d\n", + exp->export_id, exp->payload_count, exp->vcid_remote); } param->kva = (uint64_t)exp->kva; kfree(exp); diff --git a/drivers/soc/qcom/hab/hab_vchan.c b/drivers/soc/qcom/hab/hab_vchan.c index 140d75656353..2db4db8f321b 100644 --- a/drivers/soc/qcom/hab/hab_vchan.c +++ b/drivers/soc/qcom/hab/hab_vchan.c @@ -110,10 +110,7 @@ hab_vchan_free(struct kref *ref) } spin_unlock_bh(&ctx->imp_lock); if (found) { - habmm_imp_hyp_unmap(ctx->import_ctx, - exp->import_index, - exp->payload_count, - ctx->kernel); + habmm_imp_hyp_unmap(ctx->import_ctx, exp); ctx->import_total--; kfree(exp); } diff --git a/drivers/soc/qcom/rpm_stats.c b/drivers/soc/qcom/rpm_stats.c index b54af9eae8ec..ed7493d063ae 100644 --- a/drivers/soc/qcom/rpm_stats.c +++ b/drivers/soc/qcom/rpm_stats.c @@ -430,7 +430,7 @@ static ssize_t rpmstats_show(struct kobject *kobj, prvdata); } - ret = snprintf(buf, prvdata->len, prvdata->buf); + ret = snprintf(buf, prvdata->len, "%s", prvdata->buf); iounmap(prvdata->reg_base); ioremap_fail: kfree(prvdata); diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 26629b856f91..6c4445863705 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -90,6 +90,15 @@ config ONESHOT_SYNC_USER help Provide a userspace API for creating oneshot sync objects. +config ANDROID_VSOC + tristate "Android Virtual SoC support" + default n + depends on PCI_MSI + ---help--- + This option adds support for the Virtual SoC driver needed to boot + a 'cuttlefish' Android image inside QEmu. The driver interacts with + a QEmu ivshmem device. If built as a module, it will be called vsoc. + source "drivers/staging/android/ion/Kconfig" source "drivers/staging/android/fiq_debugger/Kconfig" diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index b0b47ae4c0ea..8ef816152020 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER) += lowmemorykiller.o obj-$(CONFIG_SYNC) += sync.o sync_debug.o obj-$(CONFIG_SW_SYNC) += sw_sync.o obj-$(CONFIG_ONESHOT_SYNC) += oneshot_sync.o +obj-$(CONFIG_ANDROID_VSOC) += vsoc.o diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 8f3ac37bfe12..0c32c00fa700 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -25,5 +25,15 @@ ion/ exposes existing cma regions and doesn't reserve unecessarily memory when booting a system which doesn't use ion. +vsoc.c, uapi/vsoc_shm.h + - The current driver uses the same wait queue for all of the futexes in a + region. This will cause false wakeups in regions with a large number of + waiting threads. We should eventually use multiple queues and select the + queue based on the region. + - Add debugfs support for examining the permissions of regions. + - Use ioremap_wc instead of ioremap_nocache. + - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been + superseded by the futex and is there for legacy reasons. + Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc: Arve HjønnevÃ¥g <arve@android.com> and Riley Andrews <riandrews@android.com> diff --git a/drivers/staging/android/uapi/vsoc_shm.h b/drivers/staging/android/uapi/vsoc_shm.h new file mode 100644 index 000000000000..741b1387c25b --- /dev/null +++ b/drivers/staging/android/uapi/vsoc_shm.h @@ -0,0 +1,303 @@ +/* + * Copyright (C) 2017 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _UAPI_LINUX_VSOC_SHM_H +#define _UAPI_LINUX_VSOC_SHM_H + +#include <linux/types.h> + +/** + * A permission is a token that permits a receiver to read and/or write an area + * of memory within a Vsoc region. + * + * An fd_scoped permission grants both read and write access, and can be + * attached to a file description (see open(2)). + * Ownership of the area can then be shared by passing a file descriptor + * among processes. + * + * begin_offset and end_offset define the area of memory that is controlled by + * the permission. owner_offset points to a word, also in shared memory, that + * controls ownership of the area. + * + * ownership of the region expires when the associated file description is + * released. + * + * At most one permission can be attached to each file description. + * + * This is useful when implementing HALs like gralloc that scope and pass + * ownership of shared resources via file descriptors. + * + * The caller is responsibe for doing any fencing. + * + * The calling process will normally identify a currently free area of + * memory. It will construct a proposed fd_scoped_permission_arg structure: + * + * begin_offset and end_offset describe the area being claimed + * + * owner_offset points to the location in shared memory that indicates the + * owner of the area. + * + * owned_value is the value that will be stored in owner_offset iff the + * permission can be granted. It must be different than VSOC_REGION_FREE. + * + * Two fd_scoped_permission structures are compatible if they vary only by + * their owned_value fields. + * + * The driver ensures that, for any group of simultaneous callers proposing + * compatible fd_scoped_permissions, it will accept exactly one of the + * propopsals. The other callers will get a failure with errno of EAGAIN. + * + * A process receiving a file descriptor can identify the region being + * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl. + */ +struct fd_scoped_permission { + __u32 begin_offset; + __u32 end_offset; + __u32 owner_offset; + __u32 owned_value; +}; + +/* + * This value represents a free area of memory. The driver expects to see this + * value at owner_offset when creating a permission otherwise it will not do it, + * and will write this value back once the permission is no longer needed. + */ +#define VSOC_REGION_FREE ((__u32)0) + +/** + * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION + */ +struct fd_scoped_permission_arg { + struct fd_scoped_permission perm; + __s32 managed_region_fd; +}; + +#define VSOC_NODE_FREE ((__u32)0) + +/* + * Describes a signal table in shared memory. Each non-zero entry in the + * table indicates that the receiver should signal the futex at the given + * offset. Offsets are relative to the region, not the shared memory window. + * + * interrupt_signalled_offset is used to reliably signal interrupts across the + * vmm boundary. There are two roles: transmitter and receiver. For example, + * in the host_to_guest_signal_table the host is the transmitter and the + * guest is the receiver. The protocol is as follows: + * + * 1. The transmitter should convert the offset of the futex to an offset + * in the signal table [0, (1 << num_nodes_lg2)) + * The transmitter can choose any appropriate hashing algorithm, including + * hash = futex_offset & ((1 << num_nodes_lg2) - 1) + * + * 3. The transmitter should atomically compare and swap futex_offset with 0 + * at hash. There are 3 possible outcomes + * a. The swap fails because the futex_offset is already in the table. + * The transmitter should stop. + * b. Some other offset is in the table. This is a hash collision. The + * transmitter should move to another table slot and try again. One + * possible algorithm: + * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1) + * c. The swap worked. Continue below. + * + * 3. The transmitter atomically swaps 1 with the value at the + * interrupt_signalled_offset. There are two outcomes: + * a. The prior value was 1. In this case an interrupt has already been + * posted. The transmitter is done. + * b. The prior value was 0, indicating that the receiver may be sleeping. + * The transmitter will issue an interrupt. + * + * 4. On waking the receiver immediately exchanges a 0 with the + * interrupt_signalled_offset. If it receives a 0 then this a spurious + * interrupt. That may occasionally happen in the current protocol, but + * should be rare. + * + * 5. The receiver scans the signal table by atomicaly exchanging 0 at each + * location. If a non-zero offset is returned from the exchange the + * receiver wakes all sleepers at the given offset: + * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT); + * + * 6. The receiver thread then does a conditional wait, waking immediately + * if the value at interrupt_signalled_offset is non-zero. This catches cases + * here additional signals were posted while the table was being scanned. + * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT + * ioctl. + */ +struct vsoc_signal_table_layout { + /* log_2(Number of signal table entries) */ + __u32 num_nodes_lg2; + /* + * Offset to the first signal table entry relative to the start of the + * region + */ + __u32 futex_uaddr_table_offset; + /* + * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates + * that one or more offsets are currently posted in the table. + * semi-unique access to an entry in the table + */ + __u32 interrupt_signalled_offset; +}; + +#define VSOC_REGION_WHOLE ((__s32)0) +#define VSOC_DEVICE_NAME_SZ 16 + +/** + * Each HAL would (usually) talk to a single device region + * Mulitple entities care about these regions: + * - The ivshmem_server will populate the regions in shared memory + * - The guest kernel will read the region, create minor device nodes, and + * allow interested parties to register for FUTEX_WAKE events in the region + * - HALs will access via the minor device nodes published by the guest kernel + * - Host side processes will access the region via the ivshmem_server: + * 1. Pass name to ivshmem_server at a UNIX socket + * 2. ivshmemserver will reply with 2 fds: + * - host->guest doorbell fd + * - guest->host doorbell fd + * - fd for the shared memory region + * - region offset + * 3. Start a futex receiver thread on the doorbell fd pointed at the + * signal_nodes + */ +struct vsoc_device_region { + __u16 current_version; + __u16 min_compatible_version; + __u32 region_begin_offset; + __u32 region_end_offset; + __u32 offset_of_region_data; + struct vsoc_signal_table_layout guest_to_host_signal_table; + struct vsoc_signal_table_layout host_to_guest_signal_table; + /* Name of the device. Must always be terminated with a '\0', so + * the longest supported device name is 15 characters. + */ + char device_name[VSOC_DEVICE_NAME_SZ]; + /* There are two ways that permissions to access regions are handled: + * - When subdivided_by is VSOC_REGION_WHOLE, any process that can + * open the device node for the region gains complete access to it. + * - When subdivided is set processes that open the region cannot + * access it. Access to a sub-region must be established by invoking + * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region + * referenced in subdivided_by, providing a fileinstance + * (represented by a fd) opened on this region. + */ + __u32 managed_by; +}; + +/* + * The vsoc layout descriptor. + * The first 4K should be reserved for the shm header and region descriptors. + * The regions should be page aligned. + */ + +struct vsoc_shm_layout_descriptor { + __u16 major_version; + __u16 minor_version; + + /* size of the shm. This may be redundant but nice to have */ + __u32 size; + + /* number of shared memory regions */ + __u32 region_count; + + /* The offset to the start of region descriptors */ + __u32 vsoc_region_desc_offset; +}; + +/* + * This specifies the current version that should be stored in + * vsoc_shm_layout_descriptor.major_version and + * vsoc_shm_layout_descriptor.minor_version. + * It should be updated only if the vsoc_device_region and + * vsoc_shm_layout_descriptor structures have changed. + * Versioning within each region is transferred + * via the min_compatible_version and current_version fields in + * vsoc_device_region. The driver does not consult these fields: they are left + * for the HALs and host processes and will change independently of the layout + * version. + */ +#define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2 +#define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0 + +#define VSOC_CREATE_FD_SCOPED_PERMISSION \ + _IOW(0xF5, 0, struct fd_scoped_permission) +#define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt if one is not already + * in flight. + */ +#define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2) + +/* + * When this returns the guest will scan host_to_guest_signal_table to + * check for new futexes to wake. + */ +/* TODO(ghartman): Consider moving this to the bottom half */ +#define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3) + +/* + * Guest HALs will use this to retrieve the region description after + * opening their device node. + */ +#define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region) + +/* + * Wake any threads that may be waiting for a host interrupt on this region. + * This is mostly used during shutdown. + */ +#define VSOC_SELF_INTERRUPT _IO(0xF5, 5) + +/* + * This is used to signal the host to scan the guest_to_host_signal_table + * for new futexes to wake. This sends an interrupt unconditionally. + */ +#define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6) + +enum wait_types { + VSOC_WAIT_UNDEFINED = 0, + VSOC_WAIT_IF_EQUAL = 1, + VSOC_WAIT_IF_EQUAL_TIMEOUT = 2 +}; + +/* + * Wait for a condition to be true + * + * Note, this is sized and aligned so the 32 bit and 64 bit layouts are + * identical. + */ +struct vsoc_cond_wait { + /* Input: Offset of the 32 bit word to check */ + __u32 offset; + /* Input: Value that will be compared with the offset */ + __u32 value; + /* Monotonic time to wake at in seconds */ + __u64 wake_time_sec; + /* Input: Monotonic time to wait in nanoseconds */ + __u32 wake_time_nsec; + /* Input: Type of wait */ + __u32 wait_type; + /* Output: Number of times the thread woke before returning. */ + __u32 wakes; + /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit + * compatibility. + */ + __u32 reserved_1; +}; + +#define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait) + +/* Wake any local threads waiting at the offset given in arg */ +#define VSOC_COND_WAKE _IO(0xF5, 8) + +#endif /* _UAPI_LINUX_VSOC_SHM_H */ diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c new file mode 100644 index 000000000000..587c66d709b9 --- /dev/null +++ b/drivers/staging/android/vsoc.c @@ -0,0 +1,1169 @@ +/* + * drivers/android/staging/vsoc.c + * + * Android Virtual System on a Chip (VSoC) driver + * + * Copyright (C) 2017 Google, Inc. + * + * Author: ghartman@google.com + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory + * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca> + * + * Based on cirrusfb.c and 8139cp.c: + * Copyright 1999-2001 Jeff Garzik + * Copyright 2001-2004 Jeff Garzik + */ + +#include <linux/dma-mapping.h> +#include <linux/freezer.h> +#include <linux/futex.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/cdev.h> +#include <linux/file.h> +#include "uapi/vsoc_shm.h" + +#define VSOC_DEV_NAME "vsoc" + +/* + * Description of the ivshmem-doorbell PCI device used by QEmu. These + * constants follow docs/specs/ivshmem-spec.txt, which can be found in + * the QEmu repository. This was last reconciled with the version that + * came out with 2.8 + */ + +/* + * These constants are determined KVM Inter-VM shared memory device + * register offsets + */ +enum { + INTR_MASK = 0x00, /* Interrupt Mask */ + INTR_STATUS = 0x04, /* Interrupt Status */ + IV_POSITION = 0x08, /* VM ID */ + DOORBELL = 0x0c, /* Doorbell */ +}; + +static const int REGISTER_BAR; /* Equal to 0 */ +static const int MAX_REGISTER_BAR_LEN = 0x100; +/* + * The MSI-x BAR is not used directly. + * + * static const int MSI_X_BAR = 1; + */ +static const int SHARED_MEMORY_BAR = 2; + +struct vsoc_region_data { + char name[VSOC_DEVICE_NAME_SZ + 1]; + wait_queue_head_t interrupt_wait_queue; + /* TODO(b/73664181): Use multiple futex wait queues */ + wait_queue_head_t futex_wait_queue; + /* Flag indicating that an interrupt has been signalled by the host. */ + atomic_t *incoming_signalled; + /* Flag indicating the guest has signalled the host. */ + atomic_t *outgoing_signalled; + int irq_requested; + int device_created; +}; + +struct vsoc_device { + /* Kernel virtual address of REGISTER_BAR. */ + void __iomem *regs; + /* Physical address of SHARED_MEMORY_BAR. */ + phys_addr_t shm_phys_start; + /* Kernel virtual address of SHARED_MEMORY_BAR. */ + void *kernel_mapped_shm; + /* Size of the entire shared memory window in bytes. */ + size_t shm_size; + /* + * Pointer to the virtual address of the shared memory layout structure. + * This is probably identical to kernel_mapped_shm, but saving this + * here saves a lot of annoying casts. + */ + struct vsoc_shm_layout_descriptor *layout; + /* + * Points to a table of region descriptors in the kernel's virtual + * address space. Calculated from + * vsoc_shm_layout_descriptor.vsoc_region_desc_offset + */ + struct vsoc_device_region *regions; + /* Head of a list of permissions that have been granted. */ + struct list_head permissions; + struct pci_dev *dev; + /* Per-region (and therefore per-interrupt) information. */ + struct vsoc_region_data *regions_data; + /* + * Table of msi-x entries. This has to be separated from struct + * vsoc_region_data because the kernel deals with them as an array. + */ + struct msix_entry *msix_entries; + /* + * Flags that indicate what we've initialzied. These are used to do an + * orderly cleanup of the device. + */ + char enabled_device; + char requested_regions; + char cdev_added; + char class_added; + char msix_enabled; + /* Mutex that protectes the permission list */ + struct mutex mtx; + /* Major number assigned by the kernel */ + int major; + + struct cdev cdev; + struct class *class; +}; + +static struct vsoc_device vsoc_dev; + +/* + * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions. + */ + +struct fd_scoped_permission_node { + struct fd_scoped_permission permission; + struct list_head list; +}; + +struct vsoc_private_data { + struct fd_scoped_permission_node *fd_scoped_permission_node; +}; + +static long vsoc_ioctl(struct file *, unsigned int, unsigned long); +static int vsoc_mmap(struct file *, struct vm_area_struct *); +static int vsoc_open(struct inode *, struct file *); +static int vsoc_release(struct inode *, struct file *); +static ssize_t vsoc_read(struct file *, char *, size_t, loff_t *); +static ssize_t vsoc_write(struct file *, const char *, size_t, loff_t *); +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg *__user arg); +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm); +static long do_vsoc_describe_region(struct file *, + struct vsoc_device_region __user *); +static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off); + +/** + * Validate arguments on entry points to the driver. + */ +inline int vsoc_validate_inode(struct inode *inode) +{ + if (iminor(inode) >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "describe_region: invalid region %d\n", iminor(inode)); + return -ENODEV; + } + return 0; +} + +inline int vsoc_validate_filep(struct file *filp) +{ + int ret = vsoc_validate_inode(file_inode(filp)); + + if (ret) + return ret; + if (!filp->private_data) { + dev_err(&vsoc_dev.dev->dev, + "No private data on fd, region %d\n", + iminor(file_inode(filp))); + return -EBADFD; + } + return 0; +} + +/* Converts from shared memory offset to virtual address */ +static inline void *shm_off_to_virtual_addr(__u32 offset) +{ + return vsoc_dev.kernel_mapped_shm + offset; +} + +/* Converts from shared memory offset to physical address */ +static inline phys_addr_t shm_off_to_phys_addr(__u32 offset) +{ + return vsoc_dev.shm_phys_start + offset; +} + +/** + * Convenience functions to obtain the region from the inode or file. + * Dangerous to call before validating the inode/file. + */ +static inline struct vsoc_device_region *vsoc_region_from_inode( + struct inode *inode) +{ + return &vsoc_dev.regions[iminor(inode)]; +} + +static inline struct vsoc_device_region *vsoc_region_from_filep( + struct file *inode) +{ + return vsoc_region_from_inode(file_inode(inode)); +} + +static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r) +{ + return r->region_end_offset - r->region_begin_offset; +} + +static const struct file_operations vsoc_ops = { + .owner = THIS_MODULE, + .open = vsoc_open, + .mmap = vsoc_mmap, + .read = vsoc_read, + .unlocked_ioctl = vsoc_ioctl, + .compat_ioctl = vsoc_ioctl, + .write = vsoc_write, + .llseek = vsoc_lseek, + .release = vsoc_release, +}; + +static struct pci_device_id vsoc_id_table[] = { + {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0}, +}; + +MODULE_DEVICE_TABLE(pci, vsoc_id_table); + +static void vsoc_remove_device(struct pci_dev *pdev); +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent); + +static struct pci_driver vsoc_pci_driver = { + .name = "vsoc", + .id_table = vsoc_id_table, + .probe = vsoc_probe_device, + .remove = vsoc_remove_device, +}; + +static int do_create_fd_scoped_permission( + struct vsoc_device_region *region_p, + struct fd_scoped_permission_node *np, + struct fd_scoped_permission_arg *__user arg) +{ + struct file *managed_filp; + s32 managed_fd; + atomic_t *owner_ptr = NULL; + struct vsoc_device_region *managed_region_p; + + if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || + copy_from_user(&managed_fd, + &arg->managed_region_fd, sizeof(managed_fd))) { + return -EFAULT; + } + managed_filp = fdget(managed_fd).file; + /* Check that it's a valid fd, */ + if (!managed_filp || vsoc_validate_filep(managed_filp)) + return -EPERM; + /* EEXIST if the given fd already has a permission. */ + if (((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node) + return -EEXIST; + managed_region_p = vsoc_region_from_filep(managed_filp); + /* Check that the provided region is managed by this one */ + if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) + return -EPERM; + /* The area must be well formed and have non-zero size */ + if (np->permission.begin_offset >= np->permission.end_offset) + return -EINVAL; + /* The area must fit in the memory window */ + if (np->permission.end_offset > + vsoc_device_region_size(managed_region_p)) + return -ERANGE; + /* The area must be in the region data section */ + if (np->permission.begin_offset < + managed_region_p->offset_of_region_data) + return -ERANGE; + /* The area must be page aligned */ + if (!PAGE_ALIGNED(np->permission.begin_offset) || + !PAGE_ALIGNED(np->permission.end_offset)) + return -EINVAL; + /* Owner offset must be naturally aligned in the window */ + if (np->permission.owner_offset & + (sizeof(np->permission.owner_offset) - 1)) + return -EINVAL; + /* The owner flag must reside in the owner memory */ + if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > + vsoc_device_region_size(region_p)) + return -ERANGE; + /* The owner flag must reside in the data section */ + if (np->permission.owner_offset < region_p->offset_of_region_data) + return -EINVAL; + /* The owner value must change to claim the memory */ + if (np->permission.owned_value == VSOC_REGION_FREE) + return -EINVAL; + owner_ptr = + (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + + np->permission.owner_offset); + /* We've already verified that this is in the shared memory window, so + * it should be safe to write to this address. + */ + if (atomic_cmpxchg(owner_ptr, + VSOC_REGION_FREE, + np->permission.owned_value) != VSOC_REGION_FREE) { + return -EBUSY; + } + ((struct vsoc_private_data *)managed_filp->private_data)-> + fd_scoped_permission_node = np; + /* The file offset needs to be adjusted if the calling + * process did any read/write operations on the fd + * before creating the permission. + */ + if (managed_filp->f_pos) { + if (managed_filp->f_pos > np->permission.end_offset) { + /* If the offset is beyond the permission end, set it + * to the end. + */ + managed_filp->f_pos = np->permission.end_offset; + } else { + /* If the offset is within the permission interval + * keep it there otherwise reset it to zero. + */ + if (managed_filp->f_pos < np->permission.begin_offset) { + managed_filp->f_pos = 0; + } else { + managed_filp->f_pos -= + np->permission.begin_offset; + } + } + } + return 0; +} + +static void do_destroy_fd_scoped_permission_node( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission_node *node) +{ + if (node) { + do_destroy_fd_scoped_permission(owner_region_p, + &node->permission); + mutex_lock(&vsoc_dev.mtx); + list_del(&node->list); + mutex_unlock(&vsoc_dev.mtx); + kfree(node); + } +} + +static void do_destroy_fd_scoped_permission( + struct vsoc_device_region *owner_region_p, + struct fd_scoped_permission *perm) +{ + atomic_t *owner_ptr = NULL; + int prev = 0; + + if (!perm) + return; + owner_ptr = (atomic_t *)shm_off_to_virtual_addr( + owner_region_p->region_begin_offset + perm->owner_offset); + prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE); + if (prev != perm->owned_value) + dev_err(&vsoc_dev.dev->dev, + "%x-%x: owner (%s) %x: expected to be %x was %x", + perm->begin_offset, perm->end_offset, + owner_region_p->device_name, perm->owner_offset, + perm->owned_value, prev); +} + +static long do_vsoc_describe_region(struct file *filp, + struct vsoc_device_region __user *dest) +{ + struct vsoc_device_region *region_p; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + if (copy_to_user(dest, region_p, sizeof(*region_p))) + return -EFAULT; + return 0; +} + +/** + * Implements the inner logic of cond_wait. Copies to and from userspace are + * done in the helper function below. + */ +static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) +{ + DEFINE_WAIT(wait); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + struct hrtimer_sleeper timeout, *to = NULL; + int ret = 0; + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + atomic_t *address = NULL; + struct timespec ts; + + /* Ensure that the offset is aligned */ + if (arg->offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)arg->offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + address = shm_off_to_virtual_addr(region_p->region_begin_offset + + arg->offset); + + /* Ensure that the type of wait is valid */ + switch (arg->wait_type) { + case VSOC_WAIT_IF_EQUAL: + break; + case VSOC_WAIT_IF_EQUAL_TIMEOUT: + to = &timeout; + break; + default: + return -EINVAL; + } + + if (to) { + /* Copy the user-supplied timesec into the kernel structure. + * We do things this way to flatten differences between 32 bit + * and 64 bit timespecs. + */ + ts.tv_sec = arg->wake_time_sec; + ts.tv_nsec = arg->wake_time_nsec; + + if (!timespec_valid(&ts)) + return -EINVAL; + hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); + hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), + current->timer_slack_ns); + + hrtimer_init_sleeper(to, current); + } + + while (1) { + prepare_to_wait(&data->futex_wait_queue, &wait, + TASK_INTERRUPTIBLE); + /* + * Check the sentinel value after prepare_to_wait. If the value + * changes after this check the writer will call signal, + * changing the task state from INTERRUPTIBLE to RUNNING. That + * will ensure that schedule() will eventually schedule this + * task. + */ + if (atomic_read(address) != arg->value) { + ret = 0; + break; + } + if (to) { + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + if (likely(to->task)) + freezable_schedule(); + hrtimer_cancel(&to->timer); + if (!to->task) { + ret = -ETIMEDOUT; + break; + } + } else { + freezable_schedule(); + } + /* Count the number of times that we woke up. This is useful + * for unit testing. + */ + ++arg->wakes; + if (signal_pending(current)) { + ret = -EINTR; + break; + } + } + finish_wait(&data->futex_wait_queue, &wait); + if (to) + destroy_hrtimer_on_stack(&to->timer); + return ret; +} + +/** + * Handles the details of copying from/to userspace to ensure that the copies + * happen on all of the return paths of cond_wait. + */ +static int do_vsoc_cond_wait(struct file *filp, + struct vsoc_cond_wait __user *untrusted_in) +{ + struct vsoc_cond_wait arg; + int rval = 0; + + if (copy_from_user(&arg, untrusted_in, sizeof(arg))) + return -EFAULT; + /* wakes is an out parameter. Initialize it to something sensible. */ + arg.wakes = 0; + rval = handle_vsoc_cond_wait(filp, &arg); + if (copy_to_user(untrusted_in, &arg, sizeof(arg))) + return -EFAULT; + return rval; +} + +static int do_vsoc_cond_wake(struct file *filp, uint32_t offset) +{ + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); + u32 region_number = iminor(file_inode(filp)); + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; + /* Ensure that the offset is aligned */ + if (offset & (sizeof(uint32_t) - 1)) + return -EADDRNOTAVAIL; + /* Ensure that the offset is within shared memory */ + if (((uint64_t)offset) + region_p->region_begin_offset + + sizeof(uint32_t) > region_p->region_end_offset) + return -E2BIG; + /* + * TODO(b/73664181): Use multiple futex wait queues. + * We need to wake every sleeper when the condition changes. Typically + * only a single thread will be waiting on the condition, but there + * are exceptions. The worst case is about 10 threads. + */ + wake_up_interruptible_all(&data->futex_wait_queue); + return 0; +} + +static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + int rv = 0; + struct vsoc_device_region *region_p; + u32 reg_num; + struct vsoc_region_data *reg_data; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + region_p = vsoc_region_from_filep(filp); + reg_num = iminor(file_inode(filp)); + reg_data = vsoc_dev.regions_data + reg_num; + switch (cmd) { + case VSOC_CREATE_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + /* We can't allocate memory for the permission */ + if (!node) + return -ENOMEM; + INIT_LIST_HEAD(&node->list); + rv = do_create_fd_scoped_permission( + region_p, + node, + (struct fd_scoped_permission_arg __user *)arg); + if (!rv) { + mutex_lock(&vsoc_dev.mtx); + list_add(&node->list, &vsoc_dev.permissions); + mutex_unlock(&vsoc_dev.mtx); + } else { + kfree(node); + return rv; + } + } + break; + + case VSOC_GET_FD_SCOPED_PERMISSION: + { + struct fd_scoped_permission_node *node = + ((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node; + if (!node) + return -ENOENT; + if (copy_to_user + ((struct fd_scoped_permission __user *)arg, + &node->permission, sizeof(node->permission))) + return -EFAULT; + } + break; + + case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST: + if (!atomic_xchg( + reg_data->outgoing_signalled, + 1)) { + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + } else { + return -EBUSY; + } + break; + + case VSOC_SEND_INTERRUPT_TO_HOST: + writel(reg_num, vsoc_dev.regs + DOORBELL); + return 0; + + case VSOC_WAIT_FOR_INCOMING_INTERRUPT: + wait_event_interruptible( + reg_data->interrupt_wait_queue, + (atomic_read(reg_data->incoming_signalled) != 0)); + break; + + case VSOC_DESCRIBE_REGION: + return do_vsoc_describe_region( + filp, + (struct vsoc_device_region __user *)arg); + + case VSOC_SELF_INTERRUPT: + atomic_set(reg_data->incoming_signalled, 1); + wake_up_interruptible(®_data->interrupt_wait_queue); + break; + + case VSOC_COND_WAIT: + return do_vsoc_cond_wait(filp, + (struct vsoc_cond_wait __user *)arg); + case VSOC_COND_WAKE: + return do_vsoc_cond_wake(filp, arg); + + default: + return -EINVAL; + } + return 0; +} + +static ssize_t vsoc_read(struct file *filp, char *buffer, size_t len, + loff_t *poffset) +{ + __u32 area_off; + void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_to_user(buffer, area_p, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) +{ + ssize_t area_len = 0; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, NULL); + switch (origin) { + case SEEK_SET: + break; + + case SEEK_CUR: + if (offset > 0 && offset + filp->f_pos < 0) + return -EOVERFLOW; + offset += filp->f_pos; + break; + + case SEEK_END: + if (offset > 0 && offset + area_len < 0) + return -EOVERFLOW; + offset += area_len; + break; + + case SEEK_DATA: + if (offset >= area_len) + return -EINVAL; + if (offset < 0) + offset = 0; + break; + + case SEEK_HOLE: + /* Next hole is always the end of the region, unless offset is + * beyond that + */ + if (offset < area_len) + offset = area_len; + break; + + default: + return -EINVAL; + } + + if (offset < 0 || offset > area_len) + return -EINVAL; + filp->f_pos = offset; + + return offset; +} + +static ssize_t vsoc_write(struct file *filp, const char *buffer, + size_t len, loff_t *poffset) +{ + __u32 area_off; + void *area_p; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + area_p = shm_off_to_virtual_addr(area_off); + area_p += *poffset; + area_len -= *poffset; + if (area_len <= 0) + return 0; + if (area_len < len) + len = area_len; + if (copy_from_user(area_p, buffer, len)) + return -EFAULT; + *poffset += len; + return len; +} + +static irqreturn_t vsoc_interrupt(int irq, void *region_data_v) +{ + struct vsoc_region_data *region_data = + (struct vsoc_region_data *)region_data_v; + int reg_num = region_data - vsoc_dev.regions_data; + + if (unlikely(!region_data)) + return IRQ_NONE; + + if (unlikely(reg_num < 0 || + reg_num >= vsoc_dev.layout->region_count)) { + dev_err(&vsoc_dev.dev->dev, + "invalid irq @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) { + dev_err(&vsoc_dev.dev->dev, + "irq not aligned @%p reg_num=0x%04x\n", + region_data, reg_num); + return IRQ_NONE; + } + wake_up_interruptible(®ion_data->interrupt_wait_queue); + return IRQ_HANDLED; +} + +static int vsoc_probe_device(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int result; + int i; + resource_size_t reg_size; + dev_t devt; + + vsoc_dev.dev = pdev; + result = pci_enable_device(pdev); + if (result) { + dev_err(&pdev->dev, + "pci_enable_device failed %s: error %d\n", + pci_name(pdev), result); + return result; + } + vsoc_dev.enabled_device = 1; + result = pci_request_regions(pdev, "vsoc"); + if (result < 0) { + dev_err(&pdev->dev, "pci_request_regions failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.requested_regions = 1; + /* Set up the control registers in BAR 0 */ + reg_size = pci_resource_len(pdev, REGISTER_BAR); + if (reg_size > MAX_REGISTER_BAR_LEN) + vsoc_dev.regs = + pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); + else + vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); + + if (!vsoc_dev.regs) { + dev_err(&pdev->dev, + "cannot ioremap registers of size %zu\n", + (size_t)reg_size); + vsoc_remove_device(pdev); + return -EBUSY; + } + + /* Map the shared memory in BAR 2 */ + vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); + vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); + + dev_info(&pdev->dev, "shared memory @ DMA %p size=0x%zx\n", + (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + /* TODO(ghartman): ioremap_wc should work here */ + vsoc_dev.kernel_mapped_shm = ioremap_nocache( + vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + if (!vsoc_dev.kernel_mapped_shm) { + dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + + vsoc_dev.layout = + (struct vsoc_shm_layout_descriptor *)vsoc_dev.kernel_mapped_shm; + dev_info(&pdev->dev, "major_version: %d\n", + vsoc_dev.layout->major_version); + dev_info(&pdev->dev, "minor_version: %d\n", + vsoc_dev.layout->minor_version); + dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); + dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); + if (vsoc_dev.layout->major_version != + CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { + dev_err(&vsoc_dev.dev->dev, + "driver supports only major_version %d\n", + CURRENT_VSOC_LAYOUT_MAJOR_VERSION); + vsoc_remove_device(pdev); + return -EBUSY; + } + result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, + VSOC_DEV_NAME); + if (result) { + dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.major = MAJOR(devt); + cdev_init(&vsoc_dev.cdev, &vsoc_ops); + vsoc_dev.cdev.owner = THIS_MODULE; + result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); + if (result) { + dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.cdev_added = 1; + vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); + if (IS_ERR(vsoc_dev.class)) { + dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); + vsoc_remove_device(pdev); + return PTR_ERR(vsoc_dev.class); + } + vsoc_dev.class_added = 1; + vsoc_dev.regions = (struct vsoc_device_region *) + (vsoc_dev.kernel_mapped_shm + + vsoc_dev.layout->vsoc_region_desc_offset); + vsoc_dev.msix_entries = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); + if (!vsoc_dev.msix_entries) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate msix_entries\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data = kcalloc( + vsoc_dev.layout->region_count, + sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); + if (!vsoc_dev.regions_data) { + dev_err(&vsoc_dev.dev->dev, + "unable to allocate regions' data\n"); + vsoc_remove_device(pdev); + return -ENOSPC; + } + for (i = 0; i < vsoc_dev.layout->region_count; ++i) + vsoc_dev.msix_entries[i].entry = i; + + result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, + vsoc_dev.layout->region_count); + if (result) { + dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); + vsoc_remove_device(pdev); + return -ENOSPC; + } + /* Check that all regions are well formed */ + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + + if (!PAGE_ALIGNED(region->region_begin_offset) || + !PAGE_ALIGNED(region->region_end_offset)) { + dev_err(&vsoc_dev.dev->dev, + "region %d not aligned (%x:%x)", i, + region->region_begin_offset, + region->region_end_offset); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->region_begin_offset >= region->region_end_offset || + region->region_end_offset > vsoc_dev.shm_size) { + dev_err(&vsoc_dev.dev->dev, + "region %d offsets are wrong: %x %x %zx", + i, region->region_begin_offset, + region->region_end_offset, vsoc_dev.shm_size); + vsoc_remove_device(pdev); + return -EFAULT; + } + if (region->managed_by >= vsoc_dev.layout->region_count) { + dev_err(&vsoc_dev.dev->dev, + "region %d has invalid owner: %u", + i, region->managed_by); + vsoc_remove_device(pdev); + return -EFAULT; + } + } + vsoc_dev.msix_enabled = 1; + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + const struct vsoc_device_region *region = vsoc_dev.regions + i; + size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; + const struct vsoc_signal_table_layout *h_to_g_signal_table = + ®ion->host_to_guest_signal_table; + const struct vsoc_signal_table_layout *g_to_h_signal_table = + ®ion->guest_to_host_signal_table; + + vsoc_dev.regions_data[i].name[name_sz] = '\0'; + memcpy(vsoc_dev.regions_data[i].name, region->device_name, + name_sz); + dev_info(&pdev->dev, "region %d name=%s\n", + i, vsoc_dev.regions_data[i].name); + init_waitqueue_head( + &vsoc_dev.regions_data[i].interrupt_wait_queue); + init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); + vsoc_dev.regions_data[i].incoming_signalled = + vsoc_dev.kernel_mapped_shm + + region->region_begin_offset + + h_to_g_signal_table->interrupt_signalled_offset; + vsoc_dev.regions_data[i].outgoing_signalled = + vsoc_dev.kernel_mapped_shm + + region->region_begin_offset + + g_to_h_signal_table->interrupt_signalled_offset; + + result = request_irq( + vsoc_dev.msix_entries[i].vector, + vsoc_interrupt, 0, + vsoc_dev.regions_data[i].name, + vsoc_dev.regions_data + i); + if (result) { + dev_info(&pdev->dev, + "request_irq failed irq=%d vector=%d\n", + i, vsoc_dev.msix_entries[i].vector); + vsoc_remove_device(pdev); + return -ENOSPC; + } + vsoc_dev.regions_data[i].irq_requested = 1; + if (!device_create(vsoc_dev.class, NULL, + MKDEV(vsoc_dev.major, i), + NULL, vsoc_dev.regions_data[i].name)) { + dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); + vsoc_remove_device(pdev); + return -EBUSY; + } + vsoc_dev.regions_data[i].device_created = 1; + } + return 0; +} + +/* + * This should undo all of the allocations in the probe function in reverse + * order. + * + * Notes: + * + * The device may have been partially initialized, so double check + * that the allocations happened. + * + * This function may be called multiple times, so mark resources as freed + * as they are deallocated. + */ +static void vsoc_remove_device(struct pci_dev *pdev) +{ + int i; + /* + * pdev is the first thing to be set on probe and the last thing + * to be cleared here. If it's NULL then there is no cleanup. + */ + if (!pdev || !vsoc_dev.dev) + return; + dev_info(&pdev->dev, "remove_device\n"); + if (vsoc_dev.regions_data) { + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { + if (vsoc_dev.regions_data[i].device_created) { + device_destroy(vsoc_dev.class, + MKDEV(vsoc_dev.major, i)); + vsoc_dev.regions_data[i].device_created = 0; + } + if (vsoc_dev.regions_data[i].irq_requested) + free_irq(vsoc_dev.msix_entries[i].vector, NULL); + vsoc_dev.regions_data[i].irq_requested = 0; + } + kfree(vsoc_dev.regions_data); + vsoc_dev.regions_data = 0; + } + if (vsoc_dev.msix_enabled) { + pci_disable_msix(pdev); + vsoc_dev.msix_enabled = 0; + } + kfree(vsoc_dev.msix_entries); + vsoc_dev.msix_entries = 0; + vsoc_dev.regions = 0; + if (vsoc_dev.class_added) { + class_destroy(vsoc_dev.class); + vsoc_dev.class_added = 0; + } + if (vsoc_dev.cdev_added) { + cdev_del(&vsoc_dev.cdev); + vsoc_dev.cdev_added = 0; + } + if (vsoc_dev.major && vsoc_dev.layout) { + unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), + vsoc_dev.layout->region_count); + vsoc_dev.major = 0; + } + vsoc_dev.layout = 0; + if (vsoc_dev.kernel_mapped_shm) { + pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); + vsoc_dev.kernel_mapped_shm = 0; + } + if (vsoc_dev.regs) { + pci_iounmap(pdev, vsoc_dev.regs); + vsoc_dev.regs = 0; + } + if (vsoc_dev.requested_regions) { + pci_release_regions(pdev); + vsoc_dev.requested_regions = 0; + } + if (vsoc_dev.enabled_device) { + pci_disable_device(pdev); + vsoc_dev.enabled_device = 0; + } + /* Do this last: it indicates that the device is not initialized. */ + vsoc_dev.dev = NULL; +} + +static void __exit vsoc_cleanup_module(void) +{ + vsoc_remove_device(vsoc_dev.dev); + pci_unregister_driver(&vsoc_pci_driver); +} + +static int __init vsoc_init_module(void) +{ + int err = -ENOMEM; + + INIT_LIST_HEAD(&vsoc_dev.permissions); + mutex_init(&vsoc_dev.mtx); + + err = pci_register_driver(&vsoc_pci_driver); + if (err < 0) + return err; + return 0; +} + +static int vsoc_open(struct inode *inode, struct file *filp) +{ + /* Can't use vsoc_validate_filep because filp is still incomplete */ + int ret = vsoc_validate_inode(inode); + + if (ret) + return ret; + filp->private_data = + kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int vsoc_release(struct inode *inode, struct file *filp) +{ + struct vsoc_private_data *private_data = NULL; + struct fd_scoped_permission_node *node = NULL; + struct vsoc_device_region *owner_region_p = NULL; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + private_data = (struct vsoc_private_data *)filp->private_data; + if (!private_data) + return 0; + + node = private_data->fd_scoped_permission_node; + if (node) { + owner_region_p = vsoc_region_from_inode(inode); + if (owner_region_p->managed_by != VSOC_REGION_WHOLE) { + owner_region_p = + &vsoc_dev.regions[owner_region_p->managed_by]; + } + do_destroy_fd_scoped_permission_node(owner_region_p, node); + private_data->fd_scoped_permission_node = NULL; + } + kfree(private_data); + filp->private_data = NULL; + + return 0; +} + +/* + * Returns the device relative offset and length of the area specified by the + * fd scoped permission. If there is no fd scoped permission set, a default + * permission covering the entire region is assumed, unless the region is owned + * by another one, in which case the default is a permission with zero size. + */ +static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset) +{ + __u32 off = 0; + ssize_t length = 0; + struct vsoc_device_region *region_p; + struct fd_scoped_permission *perm; + + region_p = vsoc_region_from_filep(filp); + off = region_p->region_begin_offset; + perm = &((struct vsoc_private_data *)filp->private_data)-> + fd_scoped_permission_node->permission; + if (perm) { + off += perm->begin_offset; + length = perm->end_offset - perm->begin_offset; + } else if (region_p->managed_by == VSOC_REGION_WHOLE) { + /* No permission set and the regions is not owned by another, + * default to full region access. + */ + length = vsoc_device_region_size(region_p); + } else { + /* return zero length, access is denied. */ + length = 0; + } + if (area_offset) + *area_offset = off; + return length; +} + +static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long len = vma->vm_end - vma->vm_start; + __u32 area_off; + phys_addr_t mem_off; + ssize_t area_len; + int retval = vsoc_validate_filep(filp); + + if (retval) + return retval; + area_len = vsoc_get_area(filp, &area_off); + /* Add the requested offset */ + area_off += (vma->vm_pgoff << PAGE_SHIFT); + area_len -= (vma->vm_pgoff << PAGE_SHIFT); + if (area_len < len) + return -EINVAL; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + mem_off = shm_off_to_phys_addr(area_off); + if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT, + len, vma->vm_page_prot)) + return -EAGAIN; + return 0; +} + +module_init(vsoc_init_module); +module_exit(vsoc_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Greg Hartman <ghartman@google.com>"); +MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device"); +MODULE_VERSION("1.0"); diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index 013a6240f193..c1ad0aea23b9 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -169,7 +169,7 @@ int prism2mgmt_scan(wlandevice_t *wlandev, void *msgp) hw->ident_sta_fw.variant) > HFA384x_FIRMWARE_VERSION(1, 5, 0)) { if (msg->scantype.data != P80211ENUM_scantype_active) - word = cpu_to_le16(msg->maxchanneltime.data); + word = msg->maxchanneltime.data; else word = 0; diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index c5547bd711db..6a8300108148 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -589,6 +589,9 @@ static int imx_thermal_probe(struct platform_device *pdev) regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN); regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP); + data->irq_enabled = true; + data->mode = THERMAL_DEVICE_ENABLED; + ret = devm_request_threaded_irq(&pdev->dev, data->irq, imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread, 0, "imx_thermal", data); @@ -600,9 +603,6 @@ static int imx_thermal_probe(struct platform_device *pdev) return ret; } - data->irq_enabled = true; - data->mode = THERMAL_DEVICE_ENABLED; - return 0; } diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 1246aa6fcab0..737635f0bec0 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -523,6 +523,7 @@ static void allow_maximum_power(struct thermal_zone_device *tz) struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; + mutex_lock(&tz->lock); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { if ((instance->trip != params->trip_max_desired_temperature) || (!cdev_is_power_actor(instance->cdev))) @@ -532,6 +533,7 @@ static void allow_maximum_power(struct thermal_zone_device *tz) instance->cdev->updated = false; thermal_cdev_update(instance->cdev); } + mutex_unlock(&tz->lock); } /** diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 20a41f7de76f..6713fd1958e7 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -627,6 +627,7 @@ static const struct dev_pm_ops nhi_pm_ops = { * we just disable hotplug, the * pci-tunnels stay alive. */ + .thaw_noirq = nhi_resume_noirq, .restore_noirq = nhi_resume_noirq, }; diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c index dc5c0e6cdb84..4686e93aaf94 100644 --- a/drivers/tty/goldfish.c +++ b/drivers/tty/goldfish.c @@ -87,32 +87,35 @@ static void do_rw_io(struct goldfish_tty *qtty, } static void goldfish_tty_rw(struct goldfish_tty *qtty, - unsigned long addr, + const void *address_ptr, unsigned int count, int is_write) { dma_addr_t dma_handle; enum dma_data_direction dma_dir; + uintptr_t address; + address = (uintptr_t)address_ptr; dma_dir = (is_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + if (qtty->version > 0) { /* * Goldfish TTY for Ranchu platform uses * physical addresses and DMA for read/write operations */ - unsigned long addr_end = addr + count; + uintptr_t address_end = address + count; - while (addr < addr_end) { - unsigned long pg_end = (addr & PAGE_MASK) + PAGE_SIZE; - unsigned long next = - pg_end < addr_end ? pg_end : addr_end; - unsigned long avail = next - addr; + while (address < address_end) { + uintptr_t page_end = (address & PAGE_MASK) + PAGE_SIZE; + uintptr_t next = page_end < address_end ? + page_end : address_end; + uintptr_t avail = next - address; /* * Map the buffer's virtual address to the DMA address * so the buffer can be accessed by the device. */ - dma_handle = dma_map_single(qtty->dev, (void *)addr, + dma_handle = dma_map_single(qtty->dev, (void *)address, avail, dma_dir); if (dma_mapping_error(qtty->dev, dma_handle)) { @@ -127,31 +130,30 @@ static void goldfish_tty_rw(struct goldfish_tty *qtty, */ dma_unmap_single(qtty->dev, dma_handle, avail, dma_dir); - addr += avail; + address += avail; } } else { /* * Old style Goldfish TTY used on the Goldfish platform * uses virtual addresses. */ - do_rw_io(qtty, addr, count, is_write); + do_rw_io(qtty, address, count, is_write); } + } static void goldfish_tty_do_write(int line, const char *buf, unsigned int count) { struct goldfish_tty *qtty = &goldfish_ttys[line]; - unsigned long address = (unsigned long)(void *)buf; - goldfish_tty_rw(qtty, address, count, 1); + goldfish_tty_rw(qtty, buf, count, 1); } static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) { struct goldfish_tty *qtty = dev_id; void __iomem *base = qtty->base; - unsigned long address; unsigned char *buf; u32 count; @@ -160,9 +162,7 @@ static irqreturn_t goldfish_tty_interrupt(int irq, void *dev_id) return IRQ_NONE; count = tty_prepare_flip_string(&qtty->port, &buf, count); - - address = (unsigned long)(void *)buf; - goldfish_tty_rw(qtty, address, count, 0); + goldfish_tty_rw(qtty, buf, count, 0); tty_schedule_flip(&qtty->port); return IRQ_HANDLED; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 9aff37186246..78bd121ecede 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1467,6 +1467,10 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) * in which case an opening port goes back to closed and a closing port * is simply put into closed state (any further frames from the other * end will get a DM response) + * + * Some control dlci can stay in ADM mode with other dlci working just + * fine. In that case we can just keep the control dlci open after the + * DLCI_OPENING retries time out. */ static void gsm_dlci_t1(unsigned long data) @@ -1480,8 +1484,15 @@ static void gsm_dlci_t1(unsigned long data) if (dlci->retries) { gsm_command(dlci->gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); - } else + } else if (!dlci->addr && gsm->control == (DM | PF)) { + if (debug & 8) + pr_info("DLCI %d opening in ADM mode.\n", + dlci->addr); + gsm_dlci_open(dlci); + } else { gsm_dlci_close(dlci); + } + break; case DLCI_CLOSING: dlci->retries--; @@ -1499,8 +1510,8 @@ static void gsm_dlci_t1(unsigned long data) * @dlci: DLCI to open * * Commence opening a DLCI from the Linux side. We issue SABM messages - * to the modem which should then reply with a UA, at which point we - * will move into open state. Opening is done asynchronously with retry + * to the modem which should then reply with a UA or ADM, at which point + * we will move into open state. Opening is done asynchronously with retry * running off timers and the responses. */ diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 41dda25da049..190e5dc15738 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2238,6 +2238,12 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } if (tty_hung_up_p(file)) break; + /* + * Abort readers for ttys which never actually + * get hung up. See __tty_hangup(). + */ + if (test_bit(TTY_HUPPING, &tty->flags)) + break; if (!timeout) break; if (file->f_flags & O_NONBLOCK) { diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index e8dd296fb25b..c4383573cf66 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -608,6 +608,10 @@ static int omap_8250_startup(struct uart_port *port) up->lsr_saved_flags = 0; up->msr_saved_flags = 0; + /* Disable DMA for console UART */ + if (uart_console(port)) + up->dma = NULL; + if (up->dma) { ret = serial8250_request_dma(up); if (ret) { diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index fcf803ffad19..cdd2f942317c 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -884,14 +884,19 @@ static int sccnxp_probe(struct platform_device *pdev) clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(clk)) { - if (PTR_ERR(clk) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + ret = PTR_ERR(clk); + if (ret == -EPROBE_DEFER) goto err_out; - } + uartclk = 0; + } else { + clk_prepare_enable(clk); + uartclk = clk_get_rate(clk); + } + + if (!uartclk) { dev_notice(&pdev->dev, "Using default clock frequency\n"); uartclk = s->chip->freq_std; - } else - uartclk = clk_get_rate(clk); + } /* Check input frequency */ if ((uartclk < s->chip->freq_min) || (uartclk > s->chip->freq_max)) { diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index fc7711c75b01..8dd822feb972 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1457,7 +1457,16 @@ static void sci_free_dma(struct uart_port *port) if (s->chan_rx) sci_rx_dma_release(s, false); } -#else + +static void sci_flush_buffer(struct uart_port *port) +{ + /* + * In uart_flush_buffer(), the xmit circular buffer has just been + * cleared, so we have to reset tx_dma_len accordingly. + */ + to_sci_port(port)->tx_dma_len = 0; +} +#else /* !CONFIG_SERIAL_SH_SCI_DMA */ static inline void sci_request_dma(struct uart_port *port) { } @@ -1465,7 +1474,9 @@ static inline void sci_request_dma(struct uart_port *port) static inline void sci_free_dma(struct uart_port *port) { } -#endif + +#define sci_flush_buffer NULL +#endif /* !CONFIG_SERIAL_SH_SCI_DMA */ static irqreturn_t sci_rx_interrupt(int irq, void *ptr) { @@ -2205,6 +2216,7 @@ static struct uart_ops sci_uart_ops = { .break_ctl = sci_break_ctl, .startup = sci_startup, .shutdown = sci_shutdown, + .flush_buffer = sci_flush_buffer, .set_termios = sci_set_termios, .pm = sci_pm, .type = sci_type, diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a638c1738547..89fd20382ce4 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -702,6 +702,14 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) return; } + /* + * Some console devices aren't actually hung up for technical and + * historical reasons, which can lead to indefinite interruptible + * sleep in n_tty_read(). The following explicitly tells + * n_tty_read() to abort readers. + */ + set_bit(TTY_HUPPING, &tty->flags); + /* inuse_filps is protected by the single tty lock, this really needs to change if we want to flush the workqueue with the lock held */ @@ -757,6 +765,7 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); + clear_bit(TTY_HUPPING, &tty->flags); tty_unlock(tty); if (f) diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 939c6ad71068..57ee43512992 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -851,7 +851,7 @@ static inline void ci_role_destroy(struct ci_hdrc *ci) { ci_hdrc_gadget_destroy(ci); ci_hdrc_host_destroy(ci); - if (ci->is_otg) + if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) ci_hdrc_otg_destroy(ci); } @@ -951,27 +951,35 @@ static int ci_hdrc_probe(struct platform_device *pdev) /* initialize role(s) before the interrupt is requested */ if (dr_mode == USB_DR_MODE_OTG || dr_mode == USB_DR_MODE_HOST) { ret = ci_hdrc_host_init(ci); - if (ret) - dev_info(dev, "doesn't support host\n"); + if (ret) { + if (ret == -ENXIO) + dev_info(dev, "doesn't support host\n"); + else + goto deinit_phy; + } } if (dr_mode == USB_DR_MODE_OTG || dr_mode == USB_DR_MODE_PERIPHERAL) { ret = ci_hdrc_gadget_init(ci); - if (ret) - dev_info(dev, "doesn't support gadget\n"); + if (ret) { + if (ret == -ENXIO) + dev_info(dev, "doesn't support gadget\n"); + else + goto deinit_host; + } } if (!ci->roles[CI_ROLE_HOST] && !ci->roles[CI_ROLE_GADGET]) { dev_err(dev, "no supported roles\n"); ret = -ENODEV; - goto deinit_phy; + goto deinit_gadget; } if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) { ret = ci_hdrc_otg_init(ci); if (ret) { dev_err(dev, "init otg fails, ret = %d\n", ret); - goto stop; + goto deinit_gadget; } } @@ -1036,7 +1044,12 @@ static int ci_hdrc_probe(struct platform_device *pdev) ci_extcon_unregister(ci); stop: - ci_role_destroy(ci); + if (ci->is_otg && ci->roles[CI_ROLE_GADGET]) + ci_hdrc_otg_destroy(ci); +deinit_gadget: + ci_hdrc_gadget_destroy(ci); +deinit_host: + ci_hdrc_host_destroy(ci); deinit_phy: ci_usb_phy_exit(ci); diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c index 72d1109f13eb..619e5446cbe8 100644 --- a/drivers/usb/core/generic.c +++ b/drivers/usb/core/generic.c @@ -242,8 +242,13 @@ static int generic_suspend(struct usb_device *udev, pm_message_t msg) if (!udev->parent) rc = hcd_bus_suspend(udev, msg); - /* Non-root devices don't need to do anything for FREEZE or PRETHAW */ - else if (msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW) + /* + * Non-root USB2 devices don't need to do anything for FREEZE + * or PRETHAW. USB3 devices don't support global suspend and + * needs to be selectively suspended. + */ + else if ((msg.event == PM_EVENT_FREEZE || msg.event == PM_EVENT_PRETHAW) + && (udev->speed < USB_SPEED_SUPER)) rc = 0; else rc = usb_port_suspend(udev, msg); diff --git a/drivers/usb/dwc3/dwc3-keystone.c b/drivers/usb/dwc3/dwc3-keystone.c index 2be268d2423d..03a926ebf34b 100644 --- a/drivers/usb/dwc3/dwc3-keystone.c +++ b/drivers/usb/dwc3/dwc3-keystone.c @@ -112,6 +112,10 @@ static int kdwc3_probe(struct platform_device *pdev) dev->dma_mask = &kdwc3_dma_mask; kdwc->clk = devm_clk_get(kdwc->dev, "usb"); + if (IS_ERR(kdwc->clk)) { + dev_err(kdwc->dev, "unable to get usb clock\n"); + return PTR_ERR(kdwc->clk); + } error = clk_prepare_enable(kdwc->clk); if (error < 0) { diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index d2c0c1a8d979..68230adf2449 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -167,7 +167,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, ret = platform_device_add_resources(dwc3, res, ARRAY_SIZE(res)); if (ret) { dev_err(dev, "couldn't add resources to dwc3 device\n"); - return ret; + goto err; } pci_set_drvdata(pci, dwc3); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 1ffde9c5408c..9edc01692142 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -68,18 +68,27 @@ __ffs_data_got_descs(struct ffs_data *ffs, char *data, size_t len); static int __must_check __ffs_data_got_strings(struct ffs_data *ffs, char *data, size_t len); +static LIST_HEAD(inst_list); + /* ffs instance status */ -static DEFINE_MUTEX(ffs_ep_lock); -static bool ffs_inst_exist; -static struct f_fs_opts *g_opts; +#define INST_NAME_SIZE 16 -/* Free instance structures */ -static void ffs_inst_clean(struct f_fs_opts *opts); -static void ffs_inst_clean_delay(void); -static int ffs_inst_exist_check(void); +struct ffs_inst_status { + char inst_name[INST_NAME_SIZE]; + struct list_head list; + struct mutex ffs_lock; + bool inst_exist; + struct f_fs_opts *opts; + struct ffs_data *ffs_data; +}; -/* Global ffs_data pointer */ -static struct ffs_data *g_ffs_data; +/* Free instance structures */ +static void ffs_inst_clean(struct f_fs_opts *opts, + const char *inst_name); +static void ffs_inst_clean_delay(const char *inst_name); +static int ffs_inst_exist_check(const char *inst_name); +static struct ffs_inst_status *name_to_inst_status( + const char *inst_name, bool create_inst); /* The function structure ***************************************************/ @@ -300,7 +309,7 @@ static ssize_t ffs_ep0_write(struct file *file, const char __user *buf, ffs_log("enter:len %zu state %d setup_state %d flags %lu", len, ffs->state, ffs->setup_state, ffs->flags); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(ffs->dev_name); if (ret < 0) return ret; @@ -490,7 +499,7 @@ static ssize_t ffs_ep0_read(struct file *file, char __user *buf, ffs_log("enter:len %zu state %d setup_state %d flags %lu", len, ffs->state, ffs->setup_state, ffs->flags); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(ffs->dev_name); if (ret < 0) return ret; @@ -601,7 +610,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file) ffs_log("state %d setup_state %d flags %lu opened %d", ffs->state, ffs->setup_state, ffs->flags, atomic_read(&ffs->opened)); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(ffs->dev_name); if (ret < 0) return ret; @@ -643,7 +652,7 @@ static long ffs_ep0_ioctl(struct file *file, unsigned code, unsigned long value) ffs_log("state %d setup_state %d flags %lu opened %d", ffs->state, ffs->setup_state, ffs->flags, atomic_read(&ffs->opened)); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(ffs->dev_name); if (ret < 0) return ret; @@ -668,7 +677,7 @@ static unsigned int ffs_ep0_poll(struct file *file, poll_table *wait) ffs_log("enter:state %d setup_state %d flags %lu opened %d", ffs->state, ffs->setup_state, ffs->flags, atomic_read(&ffs->opened)); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(ffs->dev_name); if (ret < 0) return ret; @@ -799,6 +808,10 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) ffs_log("enter: epfile name %s epfile err %d (%s)", epfile->name, atomic_read(&epfile->error), io_data->read ? "READ" : "WRITE"); + ret = ffs_inst_exist_check(epfile->ffs->dev_name); + if (ret < 0) + return ret; + smp_mb__before_atomic(); retry: if (atomic_read(&epfile->error)) @@ -1085,7 +1098,7 @@ ffs_epfile_open(struct inode *inode, struct file *file) ffs_log("enter:state %d setup_state %d flag %lu", epfile->ffs->state, epfile->ffs->setup_state, epfile->ffs->flags); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(epfile->ffs->dev_name); if (ret < 0) return ret; @@ -1143,16 +1156,11 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct ffs_io_data io_data, *p = &io_data; ssize_t res; - int ret; ENTER(); ffs_log("enter"); - ret = ffs_inst_exist_check(); - if (ret < 0) - return ret; - if (!is_sync_kiocb(kiocb)) { p = kmalloc(sizeof(io_data), GFP_KERNEL); if (unlikely(!p)) @@ -1189,16 +1197,11 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to) { struct ffs_io_data io_data, *p = &io_data; ssize_t res; - int ret; ENTER(); ffs_log("enter"); - ret = ffs_inst_exist_check(); - if (ret < 0) - return ret; - if (!is_sync_kiocb(kiocb)) { p = kmalloc(sizeof(io_data), GFP_KERNEL); if (unlikely(!p)) @@ -1275,7 +1278,7 @@ static long ffs_epfile_ioctl(struct file *file, unsigned code, ffs_log("enter:state %d setup_state %d flag %lu", epfile->ffs->state, epfile->ffs->setup_state, epfile->ffs->flags); - ret = ffs_inst_exist_check(); + ret = ffs_inst_exist_check(epfile->ffs->dev_name); if (ret < 0) return ret; @@ -1583,6 +1586,7 @@ ffs_fs_mount(struct file_system_type *t, int flags, int ret; void *ffs_dev; struct ffs_data *ffs; + struct ffs_inst_status *inst_status; ENTER(); @@ -1612,6 +1616,18 @@ ffs_fs_mount(struct file_system_type *t, int flags, ffs->private_data = ffs_dev; data.ffs_data = ffs; + inst_status = name_to_inst_status(ffs->dev_name, false); + if (IS_ERR(inst_status)) { + ffs_log("failed to find instance (%s)\n", + ffs->dev_name); + return ERR_PTR(-EINVAL); + } + + /* Store ffs to global status structure */ + ffs_dev_lock(); + inst_status->ffs_data = ffs; + ffs_dev_unlock(); + rv = mount_nodev(t, flags, &data, ffs_sb_fill); if (IS_ERR(rv) && data.ffs_data) { ffs_release_dev(data.ffs_data); @@ -1711,6 +1727,9 @@ static void ffs_data_opened(struct ffs_data *ffs) static void ffs_data_put(struct ffs_data *ffs) { + struct ffs_inst_status *inst_status; + const char *dev_name; + ENTER(); ffs_log("enter"); @@ -1718,16 +1737,20 @@ static void ffs_data_put(struct ffs_data *ffs) smp_mb__before_atomic(); if (unlikely(atomic_dec_and_test(&ffs->ref))) { pr_info("%s(): freeing\n", __func__); - /* Clear g_ffs_data */ - ffs_dev_lock(); - g_ffs_data = NULL; - ffs_dev_unlock(); + /* Clear ffs from global structure */ + inst_status = name_to_inst_status(ffs->dev_name, false); + if (!IS_ERR(inst_status)) { + ffs_dev_lock(); + inst_status->ffs_data = NULL; + ffs_dev_unlock(); + } ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || waitqueue_active(&ffs->ep0req_completion.wait)); - kfree(ffs->dev_name); + dev_name = ffs->dev_name; kfree(ffs); - ffs_inst_clean_delay(); + ffs_inst_clean_delay(dev_name); + kfree(dev_name); } ffs_log("exit"); @@ -1792,11 +1815,6 @@ static struct ffs_data *ffs_data_new(void) /* XXX REVISIT need to update it in some places, or do we? */ ffs->ev.can_stall = 1; - /* Store ffs to g_ffs_data */ - ffs_dev_lock(); - g_ffs_data = ffs; - ffs_dev_unlock(); - ffs_log("exit"); return ffs; @@ -3684,79 +3702,146 @@ static struct config_item_type ffs_func_type = { /* Function registration interface ******************************************/ -static int ffs_inst_exist_check(void) +static struct ffs_inst_status *name_to_inst_status( + const char *inst_name, bool create_inst) +{ + struct ffs_inst_status *inst_status; + + list_for_each_entry(inst_status, &inst_list, list) { + if (!strncasecmp(inst_status->inst_name, + inst_name, strlen(inst_name))) + return inst_status; + } + + if (!create_inst) + return ERR_PTR(-ENODEV); + + inst_status = kzalloc(sizeof(struct ffs_inst_status), + GFP_KERNEL); + if (!inst_status) + return ERR_PTR(-ENOMEM); + + mutex_init(&inst_status->ffs_lock); + snprintf(inst_status->inst_name, INST_NAME_SIZE, inst_name); + list_add_tail(&inst_status->list, &inst_list); + + return inst_status; +} + +static int ffs_inst_exist_check(const char *inst_name) { - mutex_lock(&ffs_ep_lock); + struct ffs_inst_status *inst_status; - if (unlikely(ffs_inst_exist == false)) { - mutex_unlock(&ffs_ep_lock); + inst_status = name_to_inst_status(inst_name, false); + if (IS_ERR(inst_status)) { pr_err_ratelimited( - "%s: f_fs instance freed already.\n", - __func__); + "%s: failed to find instance (%s)\n", + __func__, inst_name); + return -ENODEV; + } + + mutex_lock(&inst_status->ffs_lock); + + if (unlikely(inst_status->inst_exist == false)) { + mutex_unlock(&inst_status->ffs_lock); + pr_err_ratelimited( + "%s: f_fs instance (%s) has been freed already.\n", + __func__, inst_name); return -ENODEV; } - mutex_unlock(&ffs_ep_lock); + mutex_unlock(&inst_status->ffs_lock); return 0; } -static void ffs_inst_clean(struct f_fs_opts *opts) +static void ffs_inst_clean(struct f_fs_opts *opts, + const char *inst_name) { - g_opts = NULL; + struct ffs_inst_status *inst_status; + + inst_status = name_to_inst_status(inst_name, false); + if (IS_ERR(inst_status)) { + pr_err_ratelimited( + "%s: failed to find instance (%s)\n", + __func__, inst_name); + return; + } + + inst_status->opts = NULL; + ffs_dev_lock(); _ffs_free_dev(opts->dev); ffs_dev_unlock(); kfree(opts); } -static void ffs_inst_clean_delay(void) +static void ffs_inst_clean_delay(const char *inst_name) { - mutex_lock(&ffs_ep_lock); + struct ffs_inst_status *inst_status; - if (unlikely(ffs_inst_exist == false)) { - if (g_opts) { - ffs_inst_clean(g_opts); + inst_status = name_to_inst_status(inst_name, false); + if (IS_ERR(inst_status)) { + pr_err_ratelimited( + "%s: failed to find (%s) instance\n", + __func__, inst_name); + return; + } + + mutex_lock(&inst_status->ffs_lock); + + if (unlikely(inst_status->inst_exist == false)) { + if (inst_status->opts) { + ffs_inst_clean(inst_status->opts, inst_name); pr_err_ratelimited("%s: Delayed free memory\n", __func__); } - mutex_unlock(&ffs_ep_lock); + mutex_unlock(&inst_status->ffs_lock); return; } - mutex_unlock(&ffs_ep_lock); + mutex_unlock(&inst_status->ffs_lock); } static void ffs_free_inst(struct usb_function_instance *f) { struct f_fs_opts *opts; + struct ffs_inst_status *inst_status; opts = to_f_fs_opts(f); - mutex_lock(&ffs_ep_lock); + inst_status = name_to_inst_status(opts->dev->name, false); + if (IS_ERR(inst_status)) { + ffs_log("failed to find (%s) instance\n", + opts->dev->name); + return; + } + + mutex_lock(&inst_status->ffs_lock); if (opts->dev->ffs_data && atomic_read(&opts->dev->ffs_data->opened)) { - ffs_inst_exist = false; - mutex_unlock(&ffs_ep_lock); - ffs_log("%s: Dev is open, free mem when dev close\n", - __func__); + inst_status->inst_exist = false; + mutex_unlock(&inst_status->ffs_lock); + ffs_log("Dev is open, free mem when dev (%s) close\n", + opts->dev->name); return; } - ffs_inst_clean(opts); - ffs_inst_exist = false; - g_opts = NULL; - mutex_unlock(&ffs_ep_lock); + ffs_inst_clean(opts, opts->dev->name); + inst_status->inst_exist = false; + mutex_unlock(&inst_status->ffs_lock); } #define MAX_INST_NAME_LEN 40 static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) { - struct f_fs_opts *opts; + struct f_fs_opts *opts, *opts_prev; + struct ffs_data *ffs_data_tmp; char *ptr; const char *tmp; int name_len, ret; + struct ffs_inst_status *inst_status; name_len = strlen(name) + 1; if (name_len > MAX_INST_NAME_LEN) @@ -3766,13 +3851,22 @@ static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) if (!ptr) return -ENOMEM; - mutex_lock(&ffs_ep_lock); - if (g_opts) { - mutex_unlock(&ffs_ep_lock); - ffs_log("%s: prev inst do not freed yet\n", __func__); + inst_status = name_to_inst_status(ptr, true); + if (IS_ERR(inst_status)) { + ffs_log("failed to create status struct for (%s) instance\n", + ptr); + return -EINVAL; + } + + mutex_lock(&inst_status->ffs_lock); + opts_prev = inst_status->opts; + if (opts_prev) { + mutex_unlock(&inst_status->ffs_lock); + ffs_log("instance (%s): prev inst do not freed yet\n", + inst_status->inst_name); return -EBUSY; } - mutex_unlock(&ffs_ep_lock); + mutex_unlock(&inst_status->ffs_lock); opts = to_f_fs_opts(fi); tmp = NULL; @@ -3794,8 +3888,9 @@ static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) * ffs_private_data also need to update new allocated opts->dev * address. */ - if (g_ffs_data) - opts->dev->ffs_data = g_ffs_data; + ffs_data_tmp = inst_status->ffs_data; + if (ffs_data_tmp) + opts->dev->ffs_data = ffs_data_tmp; if (opts->dev->ffs_data) opts->dev->ffs_data->private_data = opts->dev; @@ -3804,10 +3899,10 @@ static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name) kfree(tmp); - mutex_lock(&ffs_ep_lock); - ffs_inst_exist = true; - g_opts = opts; - mutex_unlock(&ffs_ep_lock); + mutex_lock(&inst_status->ffs_lock); + inst_status->inst_exist = true; + inst_status->opts = opts; + mutex_unlock(&inst_status->ffs_lock); return 0; } @@ -4212,6 +4307,20 @@ module_init(ffs_init); static void __exit ffs_exit(void) { + struct ffs_inst_status *inst_status, *inst_status_tmp = NULL; + + list_for_each_entry(inst_status, &inst_list, list) { + if (inst_status_tmp) { + list_del(&inst_status_tmp->list); + kfree(inst_status_tmp); + } + inst_status_tmp = inst_status; + } + if (inst_status_tmp) { + list_del(&inst_status_tmp->list); + kfree(inst_status_tmp); + } + if (ffs_ipc_log) { ipc_log_context_destroy(ffs_ipc_log); ffs_ipc_log = NULL; diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 408c8eca2bbe..61fb325e4267 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -437,7 +437,6 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, - .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = DEV_PM_OPS, diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 10d30afe4a3c..a0d1417362cd 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -114,15 +114,19 @@ static int service_tx_status_request( } is_in = epnum & USB_DIR_IN; - if (is_in) { - epnum &= 0x0f; + epnum &= 0x0f; + if (epnum >= MUSB_C_NUM_EPS) { + handled = -EINVAL; + break; + } + + if (is_in) ep = &musb->endpoints[epnum].ep_in; - } else { + else ep = &musb->endpoints[epnum].ep_out; - } regs = musb->endpoints[epnum].regs; - if (epnum >= MUSB_C_NUM_EPS || !ep->desc) { + if (!ep->desc) { handled = -EINVAL; break; } diff --git a/drivers/usb/storage/ene_ub6250.c b/drivers/usb/storage/ene_ub6250.c index 091e8ec7a6c0..962bb6376b0c 100644 --- a/drivers/usb/storage/ene_ub6250.c +++ b/drivers/usb/storage/ene_ub6250.c @@ -1953,6 +1953,8 @@ static int ene_load_bincode(struct us_data *us, unsigned char flag) bcb->CDB[0] = 0xEF; result = ene_send_scsi_cmd(us, FDIR_WRITE, buf, 0); + if (us->srb != NULL) + scsi_set_resid(us->srb, 0); info->BIN_FLAG = flag; kfree(buf); @@ -2306,21 +2308,22 @@ static int ms_scsi_irp(struct us_data *us, struct scsi_cmnd *srb) static int ene_transport(struct scsi_cmnd *srb, struct us_data *us) { - int result = 0; + int result = USB_STOR_XFER_GOOD; struct ene_ub6250_info *info = (struct ene_ub6250_info *)(us->extra); /*US_DEBUG(usb_stor_show_command(us, srb)); */ scsi_set_resid(srb, 0); - if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) { + if (unlikely(!(info->SD_Status.Ready || info->MS_Status.Ready))) result = ene_init(us); - } else { + if (result == USB_STOR_XFER_GOOD) { + result = USB_STOR_TRANSPORT_ERROR; if (info->SD_Status.Ready) result = sd_scsi_irp(us, srb); if (info->MS_Status.Ready) result = ms_scsi_irp(us, srb); } - return 0; + return result; } static struct scsi_host_template ene_ub6250_host_template; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index fe2b470d7ec6..c55c632a3b24 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -752,6 +752,62 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) return 0; } +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - + offset + PCI_EXP_DEVCTL); + int readrq = le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports PCIe + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use a PCIe FLR, we don't have that level of granularity. + */ + if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) { + u32 cap; + int ret; + + *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR); + + ret = pci_user_read_config_dword(vdev->pdev, + pos - offset + PCI_EXP_DEVCAP, + &cap); + + if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) + pci_try_reset_function(vdev->pdev); + } + + /* + * MPS is virtualized to the user, writes do not change the physical + * register since determining a proper MPS value requires a system wide + * device view. The MRRS is largely independent of MPS, but since the + * user does not have that system-wide view, they might set a safe, but + * inefficiently low value. Here we allow writes through to hardware, + * but we set the floor to the physical device MPS setting, so that + * we can at least use full TLPs, as defined by the MPS value. + * + * NB, if any devices actually depend on an artificially low MRRS + * setting, this will need to be revisited, perhaps with a quirk + * though pcie_set_readrq(). + */ + if (readrq != (le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ)) { + readrq = 128 << + ((le16_to_cpu(*ctrl) & PCI_EXP_DEVCTL_READRQ) >> 12); + readrq = max(readrq, pcie_get_mps(vdev->pdev)); + + pcie_set_readrq(vdev->pdev, readrq); + } + + return count; +} + /* Permissions for PCI Express capability */ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) { @@ -759,26 +815,67 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) return -ENOMEM; + perm->writefn = vfio_exp_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Allow writes to device control fields (includes FLR!) - * but not to devctl_phantom which could confuse IOMMU - * or to the ARI bit in devctl2 which is set at probe time + * Allow writes to device control fields, except devctl_phantom, + * which could confuse IOMMU, MPS, which can break communication + * with other physical devices, and the ARI bit in devctl2, which + * is set at probe time. FLR and MRRS get virtualized via our + * writefn. */ - p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD | + PCI_EXP_DEVCTL_READRQ, ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports AF + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use an AF FLR, we don't have that level of granularity. + */ + if (*ctrl & PCI_AF_CTRL_FLR) { + u8 cap; + int ret; + + *ctrl &= ~PCI_AF_CTRL_FLR; + + ret = pci_user_read_config_byte(vdev->pdev, + pos - offset + PCI_AF_CAP, + &cap); + + if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) + pci_try_reset_function(vdev->pdev); + } + + return count; +} + /* Permissions for Advanced Function capability */ static int __init init_pci_cap_af_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) return -ENOMEM; + perm->writefn = vfio_af_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); - p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); + p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR); return 0; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ad2146a9ab2d..675819a1af37 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -173,8 +173,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) if (mask) vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); if (mask & POLLERR) { - if (poll->wqh) - remove_wait_queue(poll->wqh, &poll->wait); + vhost_poll_stop(poll); ret = -EINVAL; } diff --git a/drivers/video/fbdev/vfb.c b/drivers/video/fbdev/vfb.c index 728cb6b23c42..7d8dfc7f1269 100644 --- a/drivers/video/fbdev/vfb.c +++ b/drivers/video/fbdev/vfb.c @@ -298,8 +298,23 @@ static int vfb_check_var(struct fb_var_screeninfo *var, */ static int vfb_set_par(struct fb_info *info) { + switch (info->var.bits_per_pixel) { + case 1: + info->fix.visual = FB_VISUAL_MONO01; + break; + case 8: + info->fix.visual = FB_VISUAL_PSEUDOCOLOR; + break; + case 16: + case 24: + case 32: + info->fix.visual = FB_VISUAL_TRUECOLOR; + break; + } + info->fix.line_length = get_line_length(info->var.xres_virtual, info->var.bits_per_pixel); + return 0; } @@ -540,6 +555,8 @@ static int vfb_probe(struct platform_device *dev) goto err2; platform_set_drvdata(dev, info); + vfb_set_par(info); + fb_info(info, "Virtual frame buffer device, using %ldK of video memory\n", videomemorysize >> 10); return 0; diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 016bd9355190..aa93df5833dc 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -450,7 +450,7 @@ static bool watchdog_is_running(void) is_running = (superio_inb(watchdog.sioaddr, SIO_REG_ENABLE) & BIT(0)) && (superio_inb(watchdog.sioaddr, F71808FG_REG_WDT_CONF) - & F71808FG_FLAG_WD_EN); + & BIT(F71808FG_FLAG_WD_EN)); superio_exit(watchdog.sioaddr); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7a54c6a867c8..500098cdb960 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -746,7 +746,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e767f347f2b1..88bee6703cc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2534,7 +2534,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); - ret = ret < 0 ? ret : -EIO; + ret = err < 0 ? err : -EIO; mapping_set_error(page->mapping, ret); } return 0; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 744be3c146f5..0141aba9eca6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -589,7 +589,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - down_read(&cinode->lock_sem); + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); if (cinode->can_cache_brlcks) { /* can cache locks - no need to relock */ up_read(&cinode->lock_sem); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 33b1bc21a120..807e989f436a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -999,15 +999,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_exit; } - if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + switch (rsp->ShareType) { + case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); - else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + break; + case SMB2_SHARE_TYPE_PIPE: tcon->ipc = true; cifs_dbg(FYI, "connection to pipe share\n"); - } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { - tcon->print = true; + break; + case SMB2_SHARE_TYPE_PRINT: + tcon->ipc = true; cifs_dbg(FYI, "connection to printer\n"); - } else { + break; + default: cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index a91ed46fe503..c7cf565c434e 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -25,15 +25,8 @@ #include <linux/namei.h> #include "fscrypt_private.h" -/* - * Call fscrypt_decrypt_page on every single page, reusing the encryption - * context. - */ -static void completion_pages(struct work_struct *work) +static void __fscrypt_decrypt_bio(struct bio *bio, bool done) { - struct fscrypt_ctx *ctx = - container_of(work, struct fscrypt_ctx, r.work); - struct bio *bio = ctx->r.bio; struct bio_vec *bv; int i; @@ -45,22 +38,38 @@ static void completion_pages(struct work_struct *work) if (ret) { WARN_ON_ONCE(1); SetPageError(page); - } else { + } else if (done) { SetPageUptodate(page); } - unlock_page(page); + if (done) + unlock_page(page); } +} + +void fscrypt_decrypt_bio(struct bio *bio) +{ + __fscrypt_decrypt_bio(bio, false); +} +EXPORT_SYMBOL(fscrypt_decrypt_bio); + +static void completion_pages(struct work_struct *work) +{ + struct fscrypt_ctx *ctx = + container_of(work, struct fscrypt_ctx, r.work); + struct bio *bio = ctx->r.bio; + + __fscrypt_decrypt_bio(bio, true); fscrypt_release_ctx(ctx); bio_put(bio); } -void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) +void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) { INIT_WORK(&ctx->r.work, completion_pages); ctx->r.bio = bio; - queue_work(fscrypt_read_workqueue, &ctx->r.work); + fscrypt_enqueue_decrypt_work(&ctx->r.work); } -EXPORT_SYMBOL(fscrypt_decrypt_bio_pages); +EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); void fscrypt_pullback_bio_page(struct page **page, bool restore) { diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 732a786cce9d..0758d32ad01b 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -27,6 +27,7 @@ #include <linux/dcache.h> #include <linux/namei.h> #include <crypto/aes.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; @@ -44,12 +45,18 @@ static mempool_t *fscrypt_bounce_page_pool = NULL; static LIST_HEAD(fscrypt_free_ctxs); static DEFINE_SPINLOCK(fscrypt_ctx_lock); -struct workqueue_struct *fscrypt_read_workqueue; +static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); static struct kmem_cache *fscrypt_ctx_cachep; struct kmem_cache *fscrypt_info_cachep; +void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ + queue_work(fscrypt_read_workqueue, work); +} +EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); + /** * fscrypt_release_ctx() - Releases an encryption context * @ctx: The encryption context to release. diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 6eb434363ff2..b18fa323d1d9 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -12,42 +12,46 @@ #include <linux/scatterlist.h> #include <linux/ratelimit.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + /** * fname_encrypt() - encrypt a filename * - * The caller must have allocated sufficient memory for the @oname string. + * The output buffer must be at least as large as the input buffer. + * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -static int fname_encrypt(struct inode *inode, - const struct qstr *iname, struct fscrypt_str *oname) +int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; struct scatterlist sg; - int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - unsigned int lim; - unsigned int cryptlen; - - lim = inode->i_sb->s_cop->max_namelen(inode); - if (iname->len <= 0 || iname->len > lim) - return -EIO; /* * Copy the filename to the output buffer for encrypting in-place and * pad it with the needed number of NUL bytes. */ - cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); - cryptlen = round_up(cryptlen, padding); - cryptlen = min(cryptlen, lim); - memcpy(oname->name, iname->name, iname->len); - memset(oname->name + iname->len, 0, cryptlen - iname->len); + if (WARN_ON(olen < iname->len)) + return -ENOBUFS; + memcpy(out, iname->name, iname->len); + memset(out + iname->len, 0, olen - iname->len); /* Initialize the IV */ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); @@ -62,8 +66,8 @@ static int fname_encrypt(struct inode *inode, skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, &wait); - sg_init_one(&sg, oname->name, cryptlen); - skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); + sg_init_one(&sg, out, olen); + skcipher_request_set_crypt(req, &sg, &sg, olen, iv); /* Do the encryption */ res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); @@ -74,7 +78,6 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = cryptlen; return 0; } @@ -187,50 +190,52 @@ static int digest_decode(const char *src, int len, char *dst) return cp - dst; } -u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen) +bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, + u32 max_len, u32 *encrypted_len_ret) { - int padding = 32; - struct fscrypt_info *ci = inode->i_crypt_info; - - if (ci) - padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - ilen = max(ilen, (u32)FS_CRYPTO_BLOCK_SIZE); - return round_up(ilen, padding); + int padding = 4 << (inode->i_crypt_info->ci_flags & + FS_POLICY_FLAGS_PAD_MASK); + u32 encrypted_len; + + if (orig_len > max_len) + return false; + encrypted_len = max(orig_len, (u32)FS_CRYPTO_BLOCK_SIZE); + encrypted_len = round_up(encrypted_len, padding); + *encrypted_len_ret = min(encrypted_len, max_len); + return true; } -EXPORT_SYMBOL(fscrypt_fname_encrypted_size); /** - * fscrypt_fname_crypto_alloc_obuff() - + * fscrypt_fname_alloc_buffer - allocate a buffer for presented filenames + * + * Allocate a buffer that is large enough to hold any decrypted or encoded + * filename (null-terminated), for the given maximum encrypted filename length. * - * Allocates an output buffer that is sufficient for the crypto operation - * specified by the context and the direction. + * Return: 0 on success, -errno on failure */ int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, struct fscrypt_str *crypto_str) + u32 max_encrypted_len, + struct fscrypt_str *crypto_str) { - u32 olen = fscrypt_fname_encrypted_size(inode, ilen); const u32 max_encoded_len = max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + u32 max_presented_len; - crypto_str->len = olen; - olen = max(olen, max_encoded_len); + max_presented_len = max(max_encoded_len, max_encrypted_len); - /* - * Allocated buffer can hold one more character to null-terminate the - * string - */ - crypto_str->name = kmalloc(olen + 1, GFP_NOFS); - if (!(crypto_str->name)) + crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); + if (!crypto_str->name) return -ENOMEM; + crypto_str->len = max_presented_len; return 0; } EXPORT_SYMBOL(fscrypt_fname_alloc_buffer); /** - * fscrypt_fname_crypto_free_buffer() - + * fscrypt_fname_free_buffer - free the buffer for presented filenames * - * Frees the buffer allocated for crypto operation. + * Free the buffer allocated by fscrypt_fname_alloc_buffer(). */ void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { @@ -297,35 +302,6 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); /** - * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk - * space - * - * The caller must have allocated sufficient memory for the @oname string. - * - * Return: 0 on success, -errno on failure - */ -int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - if (fscrypt_is_dot_dotdot(iname)) { - oname->name[0] = '.'; - oname->name[iname->len - 1] = '.'; - oname->len = iname->len; - return 0; - } - if (inode->i_crypt_info) - return fname_encrypt(inode, iname, oname); - /* - * Without a proper key, a user is not allowed to modify the filenames - * in a directory. Consequently, a user space name cannot be mapped to - * a disk-space name - */ - return -ENOKEY; -} -EXPORT_SYMBOL(fscrypt_fname_usr_to_disk); - -/** * fscrypt_setup_filename() - prepare to search a possibly encrypted directory * @dir: the directory that will be searched * @iname: the user-provided filename being searched for @@ -368,11 +344,17 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return ret; if (dir->i_crypt_info) { - ret = fscrypt_fname_alloc_buffer(dir, iname->len, - &fname->crypto_buf); - if (ret) - return ret; - ret = fname_encrypt(dir, iname, &fname->crypto_buf); + if (!fscrypt_fname_encrypted_size(dir, iname->len, + dir->i_sb->s_cop->max_namelen(dir), + &fname->crypto_buf.len)) + return -ENAMETOOLONG; + fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, + GFP_NOFS); + if (!fname->crypto_buf.name) + return -ENOMEM; + + ret = fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -424,7 +406,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, return 0; errout: - fscrypt_fname_free_buffer(&fname->crypto_buf); + kfree(fname->crypto_buf.name); return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index c3ad415cd14f..426aa1b27f17 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -49,6 +49,15 @@ struct fscrypt_context { #define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1 +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + /* * A pointer to this structure is stored in the file system's in-core * representation of an inode. @@ -81,9 +90,23 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, bio->bi_rw = op | op_flags; } +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + /* crypto.c */ +extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); -extern struct workqueue_struct *fscrypt_read_workqueue; extern int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw, u64 lblk_num, struct page *src_page, @@ -93,6 +116,13 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); +/* fname.c */ +extern int fname_encrypt(struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen); +extern bool fscrypt_fname_encrypted_size(const struct inode *inode, + u32 orig_len, u32 max_len, + u32 *encrypted_len_ret); + /* keyinfo.c */ extern void __exit fscrypt_essiv_cleanup(void); diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 9f5fb2eb9cf7..bc010e4609ef 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -110,3 +110,159 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); + +int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + int err; + + /* + * To calculate the size of the encrypted symlink target we need to know + * the amount of NUL padding, which is determined by the flags set in + * the encryption policy which will be inherited from the directory. + * The easiest way to get access to this is to just load the directory's + * fscrypt_info, since we'll need it to create the dir_entry anyway. + * + * Note: in test_dummy_encryption mode, @dir may be unencrypted. + */ + err = fscrypt_get_encryption_info(dir); + if (err) + return err; + if (!fscrypt_has_encryption_key(dir)) + return -ENOKEY; + + /* + * Calculate the size of the encrypted symlink and verify it won't + * exceed max_len. Note that for historical reasons, encrypted symlink + * targets are prefixed with the ciphertext length, despite this + * actually being redundant with i_size. This decreases by 2 bytes the + * longest symlink target we can accept. + * + * We could recover 1 byte by not counting a null terminator, but + * counting it (even though it is meaningless for ciphertext) is simpler + * for now since filesystems will assume it is there and subtract it. + */ + if (!fscrypt_fname_encrypted_size(dir, len, + max_len - sizeof(struct fscrypt_symlink_data), + &disk_link->len)) + return -ENAMETOOLONG; + disk_link->len += sizeof(struct fscrypt_symlink_data); + + disk_link->name = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_prepare_symlink); + +int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, struct fscrypt_str *disk_link) +{ + int err; + struct qstr iname = QSTR_INIT(target, len); + struct fscrypt_symlink_data *sd; + unsigned int ciphertext_len; + + err = fscrypt_require_key(inode); + if (err) + return err; + + if (disk_link->name) { + /* filesystem-provided buffer */ + sd = (struct fscrypt_symlink_data *)disk_link->name; + } else { + sd = kmalloc(disk_link->len, GFP_NOFS); + if (!sd) + return -ENOMEM; + } + ciphertext_len = disk_link->len - sizeof(*sd); + sd->len = cpu_to_le16(ciphertext_len); + + err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + if (err) { + if (!disk_link->name) + kfree(sd); + return err; + } + /* + * Null-terminating the ciphertext doesn't make sense, but we still + * count the null terminator in the length, so we might as well + * initialize it just in case the filesystem writes it out. + */ + sd->encrypted_path[ciphertext_len] = '\0'; + + if (!disk_link->name) + disk_link->name = (unsigned char *)sd; + return 0; +} +EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); + +/** + * fscrypt_get_symlink - get the target of an encrypted symlink + * @inode: the symlink inode + * @caddr: the on-disk contents of the symlink + * @max_size: size of @caddr buffer + * @done: if successful, will be set up to free the returned target + * + * If the symlink's encryption key is available, we decrypt its target. + * Otherwise, we encode its target for presentation. + * + * This may sleep, so the filesystem must have dropped out of RCU mode already. + * + * Return: the presentable symlink target or an ERR_PTR() + */ +void *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size) +{ + const struct fscrypt_symlink_data *sd; + struct fscrypt_str cstr, pstr; + int err; + + /* This is for encrypted symlinks only */ + if (WARN_ON(!IS_ENCRYPTED(inode))) + return ERR_PTR(-EINVAL); + + /* + * Try to set up the symlink's encryption key, but we can continue + * regardless of whether the key is available or not. + */ + err = fscrypt_get_encryption_info(inode); + if (err) + return ERR_PTR(err); + + /* + * For historical reasons, encrypted symlink targets are prefixed with + * the ciphertext length, even though this is redundant with i_size. + */ + + if (max_size < sizeof(*sd)) + return ERR_PTR(-EUCLEAN); + sd = caddr; + cstr.name = (unsigned char *)sd->encrypted_path; + cstr.len = le16_to_cpu(sd->len); + + if (cstr.len == 0) + return ERR_PTR(-EUCLEAN); + + if (cstr.len + sizeof(*sd) - 1 > max_size) + return ERR_PTR(-EUCLEAN); + + err = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); + if (err) + return ERR_PTR(err); + + err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); + if (err) + goto err_kfree; + + err = -EUCLEAN; + if (pstr.name[0] == '\0') + goto err_kfree; + + pstr.name[pstr.len] = '\0'; + return pstr.name; + +err_kfree: + kfree(pstr.name); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(fscrypt_get_symlink); diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 444c65ed6db8..7c00331da5df 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -13,6 +13,7 @@ #include <linux/ratelimit.h> #include <crypto/aes.h> #include <crypto/sha.h> +#include <crypto/skcipher.h> #include "fscrypt_private.h" static struct crypto_shash *essiv_hash_tfm; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f97110461c19..78c51ce913db 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -242,8 +242,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); - ext4_group_desc_csum_set(sb, block_group, gdp); return 0; } @@ -447,6 +445,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); if (err) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a8b1749d79a8..debf0707789d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -460,7 +460,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, int i, num; unsigned long nr_pages; - num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); if (nr_pages == 0) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 5388207d2832..e10c12f59c58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -63,44 +63,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -/* Initializes an uninitialized inode bitmap */ -static int ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) -{ - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); - J_ASSERT_BH(bh, buffer_locked(bh)); - - /* If checksum is bad mark all blocks and inodes use to prevent - * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, gdp); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EFSBADCRC; - } - - memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); - - return 0; -} - void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) { if (uptodate) { @@ -184,17 +146,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - err = ext4_init_inode_bitmap(sb, bh, block_group, desc); + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) { - ext4_error(sb, "Failed to init inode bitmap for group " - "%u: %d", block_group, err); - goto out; - } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 280d67fe33a7..820d692bc931 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -377,7 +377,7 @@ out: static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, unsigned int len) { - int ret, size; + int ret, size, no_expand; struct ext4_inode_info *ei = EXT4_I(inode); if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) @@ -387,15 +387,14 @@ static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, if (size < len) return -ENOSPC; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (ei->i_inline_off) ret = ext4_update_inline_data(handle, inode, len); else ret = ext4_create_inline_data(handle, inode, len); - up_write(&EXT4_I(inode)->xattr_sem); - + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -537,7 +536,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, struct inode *inode, unsigned flags) { - int ret, needed_blocks; + int ret, needed_blocks, no_expand; handle_t *handle = NULL; int retries = 0, sem_held = 0; struct page *page = NULL; @@ -577,7 +576,7 @@ retry: goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); sem_held = 1; /* If some one has already done this for us, just exit. */ if (!ext4_has_inline_data(inode)) { @@ -613,7 +612,7 @@ retry: page_cache_release(page); page = NULL; ext4_orphan_add(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); sem_held = 0; ext4_journal_stop(handle); handle = NULL; @@ -639,7 +638,7 @@ out: page_cache_release(page); } if (sem_held) - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); if (handle) ext4_journal_stop(handle); brelse(iloc.bh); @@ -732,7 +731,7 @@ convert: int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -750,7 +749,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, goto out; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); kaddr = kmap_atomic(page); @@ -760,7 +759,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, /* clear page dirty so that writepages wouldn't work for us. */ ClearPageDirty(page); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); out: return copied; @@ -771,7 +770,7 @@ ext4_journalled_write_inline_data(struct inode *inode, unsigned len, struct page *page) { - int ret; + int ret, no_expand; void *kaddr; struct ext4_iloc iloc; @@ -781,11 +780,11 @@ ext4_journalled_write_inline_data(struct inode *inode, return NULL; } - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, 0, len); kunmap_atomic(kaddr); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return iloc.bh; } @@ -1268,7 +1267,7 @@ out: int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, struct inode *dir, struct inode *inode) { - int ret, inline_size; + int ret, inline_size, no_expand; void *inline_start; struct ext4_iloc iloc; @@ -1276,7 +1275,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (ret) return ret; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) goto out; @@ -1322,7 +1321,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, out: ext4_mark_inode_dirty(handle, dir); - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); return ret; } @@ -1682,7 +1681,7 @@ int ext4_delete_inline_entry(handle_t *handle, struct buffer_head *bh, int *has_inline_data) { - int err, inline_size; + int err, inline_size, no_expand; struct ext4_iloc iloc; void *inline_start; @@ -1690,7 +1689,7 @@ int ext4_delete_inline_entry(handle_t *handle, if (err) return err; - down_write(&EXT4_I(dir)->xattr_sem); + ext4_write_lock_xattr(dir, &no_expand); if (!ext4_has_inline_data(dir)) { *has_inline_data = 0; goto out; @@ -1725,7 +1724,7 @@ int ext4_delete_inline_entry(handle_t *handle, ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); out: - up_write(&EXT4_I(dir)->xattr_sem); + ext4_write_unlock_xattr(dir, &no_expand); brelse(iloc.bh); if (err != -ENOENT) ext4_std_error(dir->i_sb, err); @@ -1824,11 +1823,11 @@ out: int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) { - int ret; + int ret, no_expand; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); ret = ext4_destroy_inline_data_nolock(handle, inode); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return ret; } @@ -1913,7 +1912,7 @@ out: void ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; - int inline_size, value_len, needed_blocks; + int inline_size, value_len, needed_blocks, no_expand; size_t i_size; void *value = NULL; struct ext4_xattr_ibody_find is = { @@ -1930,7 +1929,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) if (IS_ERR(handle)) return; - down_write(&EXT4_I(inode)->xattr_sem); + ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { *has_inline = 0; ext4_journal_stop(handle); @@ -1988,7 +1987,7 @@ out_error: up_write(&EXT4_I(inode)->i_data_sem); out: brelse(is.iloc.bh); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); kfree(value); if (inode->i_nlink) ext4_orphan_del(handle, inode); @@ -2004,7 +2003,7 @@ out: int ext4_convert_inline_data(struct inode *inode) { - int error, needed_blocks; + int error, needed_blocks, no_expand; handle_t *handle; struct ext4_iloc iloc; @@ -2026,15 +2025,10 @@ int ext4_convert_inline_data(struct inode *inode) goto out_free; } - down_write(&EXT4_I(inode)->xattr_sem); - if (!ext4_has_inline_data(inode)) { - up_write(&EXT4_I(inode)->xattr_sem); - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); - up_write(&EXT4_I(inode)->xattr_sem); -out: + ext4_write_lock_xattr(inode, &no_expand); + if (ext4_has_inline_data(inode)) + error = ext4_convert_inline_data_nolock(handle, inode, &iloc); + ext4_write_unlock_xattr(inode, &no_expand); ext4_journal_stop(handle); out_free: brelse(iloc.bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 78701445348f..760c5f40a555 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1545,6 +1545,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); if (invalidate) { + if (page_mapped(page)) + clear_page_dirty_for_io(page); block_invalidatepage(page, 0, PAGE_CACHE_SIZE); ClearPageUptodate(page); } @@ -3297,29 +3299,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * case, we allocate an io_end structure to hook to the iocb. */ iocb->private = NULL; - ext4_inode_aio_set(inode, NULL); - if (!is_sync_kiocb(iocb)) { - io_end = ext4_init_io_end(inode, GFP_NOFS); - if (!io_end) { - ret = -ENOMEM; - goto retake_lock; - } - /* - * Grab reference for DIO. Will be dropped in ext4_end_io_dio() - */ - iocb->private = ext4_get_io_end(io_end); - /* - * we save the io structure for current async direct - * IO, so that later ext4_map_blocks() could flag the - * io structure whether there is a unwritten extents - * needs to be converted when IO is completed. - */ - ext4_inode_aio_set(inode, io_end); - } - if (overwrite) { get_block_func = ext4_get_block_write_nolock; } else { + ext4_inode_aio_set(inode, NULL); + if (!is_sync_kiocb(iocb)) { + io_end = ext4_init_io_end(inode, GFP_NOFS); + if (!io_end) { + ret = -ENOMEM; + goto retake_lock; + } + /* + * Grab reference for DIO. Will be dropped in + * ext4_end_io_dio() + */ + iocb->private = ext4_get_io_end(io_end); + /* + * we save the io structure for current async direct + * IO, so that later ext4_map_blocks() could flag the + * io structure whether there is a unwritten extents + * needs to be converted when IO is completed. + */ + ext4_inode_aio_set(inode, io_end); + } get_block_func = ext4_get_block_write; dio_flags = DIO_LOCKING; } @@ -4317,6 +4319,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); + if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { + EXT4_ERROR_INODE(inode, "root inode unallocated"); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 27ff3706d632..03b874f3fefd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3875,7 +3875,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, "Error loading buddy information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + err, group); put_bh(bitmap_bh); return 0; } @@ -4032,10 +4033,11 @@ repeat: BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); - err = ext4_mb_load_buddy(sb, group, &e4b); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_error(sb, "Error loading buddy information for %u", - group); + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } @@ -4291,11 +4293,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + int err; group = ext4_get_group_number(sb, pa->pa_pstart); - if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, "Error loading buddy information for %u", - group); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) { + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -5121,8 +5126,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + ret, group); return ret; } bitmap = e4b.bd_bitmap; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index bc79e2ca4adb..8cff133ff5f3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2130,6 +2130,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2142,6 +2144,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2154,6 +2158,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c356b49540cb..b16bfb52edb2 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1143,16 +1143,14 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; - unsigned long no_expand; + int no_expand; int error; if (!name) return -EINVAL; if (strlen(name) > 255) return -ERANGE; - down_write(&EXT4_I(inode)->xattr_sem); - no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_write_lock_xattr(inode, &no_expand); error = ext4_reserve_inode_write(handle, inode, &is.iloc); if (error) @@ -1213,7 +1211,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + no_expand = 0; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1227,9 +1225,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); - if (no_expand == 0) - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return error; } @@ -1313,12 +1309,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ + int no_expand; + + if (ext4_write_trylock_xattr(inode, &no_expand) == 0) + return 0; - down_write(&EXT4_I(inode)->xattr_sem); - /* - * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty - */ - ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) @@ -1512,8 +1507,7 @@ retry: } brelse(bh); out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); - up_write(&EXT4_I(inode)->xattr_sem); + ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: @@ -1525,10 +1519,10 @@ cleanup: kfree(bs); brelse(bh); /* - * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode - * size expansion failed. + * Inode size expansion failed; don't try again */ - up_write(&EXT4_I(inode)->xattr_sem); + no_expand = 1; + ext4_write_unlock_xattr(inode, &no_expand); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 10b0f7323ed6..cdc413476241 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -101,6 +101,38 @@ extern const struct xattr_handler ext4_xattr_security_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" +/* + * The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes. + * The first is to signal that there the inline xattrs and data are + * taking up so much space that we might as well not keep trying to + * expand it. The second is that xattr_sem is taken for writing, so + * we shouldn't try to recurse into the inode expansion. For this + * second case, we need to make sure that we take save and restore the + * NO_EXPAND state flag appropriately. + */ +static inline void ext4_write_lock_xattr(struct inode *inode, int *save) +{ + down_write(&EXT4_I(inode)->xattr_sem); + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); +} + +static inline int ext4_write_trylock_xattr(struct inode *inode, int *save) +{ + if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0) + return 0; + *save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + return 1; +} + +static inline void ext4_write_unlock_xattr(struct inode *inode, int *save) +{ + if (*save == 0) + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); +} + extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c343e922f6e..760d1ad22722 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -68,6 +68,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, + .is_meta = is_meta, }; if (unlikely(!is_meta)) @@ -163,6 +164,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, REQ_RAHEAD, .encrypted_page = NULL, .in_list = false, + .is_meta = (type != META_POR), }; struct blk_plug plug; @@ -384,7 +386,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); @@ -573,13 +575,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) struct node_info ni; int err = acquire_orphan_inode(sbi); - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } + if (err) + goto err_out; __add_ino_entry(sbi, ino, 0, ORPHAN_INO); @@ -593,6 +590,11 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } + err = dquot_initialize(inode); + if (err) + goto err_out; + + dquot_initialize(inode); clear_nlink(inode); /* truncate all the data during iput */ @@ -602,14 +604,18 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x) by kernel, retry mount.", - __func__, ino); - return -EIO; + err = -EIO; + goto err_out; } __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; + +err_out: + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; } int recover_orphan_inodes(struct f2fs_sb_info *sbi) @@ -1140,6 +1146,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG); if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); @@ -1166,6 +1174,39 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +static void commit_checkpoint(struct f2fs_sb_info *sbi, + void *src, block_t blk_addr) +{ + struct writeback_control wbc = { + .for_reclaim = 0, + }; + + /* + * pagevec_lookup_tag and lock_page again will take + * some extra time. Therefore, update_meta_pages and + * sync_meta_pages are combined in this function. + */ + struct page *page = grab_meta_page(sbi, blk_addr); + int err; + + memcpy(page_address(page), src, PAGE_SIZE); + set_page_dirty(page); + + f2fs_wait_on_page_writeback(page, META, true); + f2fs_bug_on(sbi, PageWriteback(page)); + if (unlikely(!clear_page_dirty_for_io(page))) + f2fs_bug_on(sbi, 1); + + /* writeout cp pack 2 page */ + err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); + f2fs_bug_on(sbi, err); + + f2fs_put_page(page, 0); + + /* submit checkpoint (with barrier if NOBARRIER is not set) */ + f2fs_submit_merged_write(sbi, META_FLUSH); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1268,16 +1309,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) } } - /* need to wait for end_io results */ - wait_on_all_pages_writeback(sbi); - if (unlikely(f2fs_cp_error(sbi))) - return -EIO; - - /* flush all device cache */ - err = f2fs_flush_device_cache(sbi); - if (err) - return err; - /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); @@ -1305,26 +1336,26 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += NR_CURSEG_NODE_TYPE; } - /* writeout checkpoint block */ - update_meta_page(sbi, ckpt, start_blk); + /* update user_block_counts */ + sbi->last_valid_block_count = sbi->total_valid_block_count; + percpu_counter_set(&sbi->alloc_valid_block_count, 0); + + /* Here, we have one bio having CP pack except cp pack 2 page */ + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); - /* wait for previous submitted node/meta pages writeback */ + /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); if (unlikely(f2fs_cp_error(sbi))) return -EIO; - filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX); - filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX); - - /* update user_block_counts */ - sbi->last_valid_block_count = sbi->total_valid_block_count; - percpu_counter_set(&sbi->alloc_valid_block_count, 0); - - /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; - /* wait for previous submitted meta pages writeback */ + /* barrier and flush checkpoint cp pack 2 page if it can */ + commit_checkpoint(sbi, ckpt, start_blk); wait_on_all_pages_writeback(sbi); release_ino_entry(sbi, false); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d5299265feea..a670702cf3ff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,8 +19,6 @@ #include <linux/bio.h> #include <linux/prefetch.h> #include <linux/uio.h> -#include <linux/mm.h> -#include <linux/memcontrol.h> #include <linux/cleancache.h> #include "f2fs.h" @@ -30,6 +28,11 @@ #include <trace/events/f2fs.h> #include <trace/events/android_fs.h> +#define NUM_PREALLOC_POST_READ_CTXS 128 + +static struct kmem_cache *bio_post_read_ctx_cache; +static mempool_t *bio_post_read_ctx_pool; + static bool __is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; @@ -50,11 +53,77 @@ static bool __is_cp_guaranteed(struct page *page) return false; } -static void f2fs_read_end_io(struct bio *bio) +/* postprocessing steps for read bios */ +enum bio_post_read_step { + STEP_INITIAL = 0, + STEP_DECRYPT, +}; + +struct bio_post_read_ctx { + struct bio *bio; + struct work_struct work; + unsigned int cur_step; + unsigned int enabled_steps; +}; + +static void __read_end_io(struct bio *bio) { - struct bio_vec *bvec; + struct page *page; + struct bio_vec *bv; int i; + bio_for_each_segment_all(bv, bio, i) { + page = bv->bv_page; + + /* PG_error was set if any post_read step failed */ + if (bio->bi_error || PageError(page)) { + ClearPageUptodate(page); + SetPageError(page); + } else { + SetPageUptodate(page); + } + unlock_page(page); + } + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx); + +static void decrypt_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + + fscrypt_decrypt_bio(ctx->bio); + + bio_post_read_processing(ctx); +} + +static void bio_post_read_processing(struct bio_post_read_ctx *ctx) +{ + switch (++ctx->cur_step) { + case STEP_DECRYPT: + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { + INIT_WORK(&ctx->work, decrypt_work); + fscrypt_enqueue_decrypt_work(&ctx->work); + return; + } + ctx->cur_step++; + /* fall-through */ + default: + __read_end_io(ctx->bio); + } +} + +static bool f2fs_bio_post_read_required(struct bio *bio) +{ + return bio->bi_private && !bio->bi_error; +} + +static void f2fs_read_end_io(struct bio *bio) +{ #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { f2fs_show_injection_info(FAULT_IO); @@ -62,28 +131,15 @@ static void f2fs_read_end_io(struct bio *bio) } #endif - if (f2fs_bio_encrypted(bio)) { - if (bio->bi_error) { - fscrypt_release_ctx(bio->bi_private); - } else { - fscrypt_decrypt_bio_pages(bio->bi_private, bio); - return; - } - } - - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; + if (f2fs_bio_post_read_required(bio)) { + struct bio_post_read_ctx *ctx = bio->bi_private; - if (!bio->bi_error) { - if (!PageUptodate(page)) - SetPageUptodate(page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - unlock_page(page); + ctx->cur_step = STEP_INITIAL; + bio_post_read_processing(ctx); + return; } - bio_put(bio); + + __read_end_io(bio); } static void f2fs_write_end_io(struct bio *bio) @@ -174,15 +230,22 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, struct writeback_control *wbc, - int npages, bool is_read) + int npages, bool is_read, + enum page_type type, enum temp_type temp) { struct bio *bio; bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); - bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; - bio->bi_private = is_read ? NULL : sbi; + if (is_read) { + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = NULL; + } else { + bio->bi_end_io = f2fs_write_end_io; + bio->bi_private = sbi; + bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp); + } if (wbc) wbc_init_bio(wbc, bio); @@ -195,13 +258,12 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (!is_read_io(bio_op(bio))) { unsigned int start; - if (f2fs_sb_mounted_blkzoned(sbi->sb) && - current->plug && (type == DATA || type == NODE)) - blk_finish_plug(current->plug); - if (type != DATA && type != NODE) goto submit_io; + if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) + blk_finish_plug(current->plug); + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; start %= F2FS_IO_SIZE(sbi); @@ -376,12 +438,13 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + verify_block_addr(fio, fio->new_blkaddr); trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); /* Allocate a new bio */ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, - 1, is_read_io(fio->op)); + 1, is_read_io(fio->op), fio->type, fio->temp); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -421,8 +484,8 @@ next: } if (fio->old_blkaddr != NEW_ADDR) - verify_block_addr(sbi, fio->old_blkaddr); - verify_block_addr(sbi, fio->new_blkaddr); + verify_block_addr(fio, fio->old_blkaddr); + verify_block_addr(fio, fio->new_blkaddr); bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -444,7 +507,8 @@ alloc_new: goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, - BIO_MAX_PAGES, false); + BIO_MAX_PAGES, false, + fio->type, fio->temp); io->fio = *fio; } @@ -472,29 +536,33 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned nr_pages) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct fscrypt_ctx *ctx = NULL; struct bio *bio; - - if (f2fs_encrypted_file(inode)) { - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - return ERR_CAST(ctx); - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); - } + struct bio_post_read_ctx *ctx; + unsigned int post_read_steps = 0; bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + if (!bio) return ERR_PTR(-ENOMEM); - } f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; bio_set_op_attrs(bio, REQ_OP_READ, 0); + if (f2fs_encrypted_file(inode)) + post_read_steps |= 1 << STEP_DECRYPT; + if (post_read_steps) { + ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); + if (!ctx) { + bio_put(bio); + return ERR_PTR(-ENOMEM); + } + ctx->bio = bio; + ctx->enabled_steps = post_read_steps; + bio->bi_private = ctx; + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_block_writeback(sbi, blkaddr); + } + return bio; } @@ -831,13 +899,6 @@ alloc: return 0; } -static inline bool __force_buffered_io(struct inode *inode, int rw) -{ - return (f2fs_encrypted_file(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); -} - int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); @@ -868,9 +929,8 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_seg_type = NO_CHECK_TYPE; if (direct_io) { - /* map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); */ - map.m_seg_type = rw_hint_to_seg_type(WRITE_LIFE_NOT_SET); - flag = __force_buffered_io(inode, WRITE) ? + map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); + flag = f2fs_force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1114,6 +1174,31 @@ out: return err; } +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) +{ + struct f2fs_map_blocks map; + block_t last_lblk; + int err; + + if (pos + len > i_size_read(inode)) + return false; + + map.m_lblk = F2FS_BYTES_TO_BLK(pos); + map.m_next_pgofs = NULL; + map.m_next_extent = NULL; + map.m_seg_type = NO_CHECK_TYPE; + last_lblk = F2FS_BLK_ALIGN(pos + len); + + while (map.m_lblk < last_lblk) { + map.m_len = last_lblk - map.m_lblk; + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); + if (err || map.m_len == 0) + return false; + map.m_lblk += map.m_len; + } + return true; +} + static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, pgoff_t *next_pgofs, int seg_type) @@ -1151,8 +1236,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, return __get_data_block(inode, iblock, bh_result, create, F2FS_GET_BLOCK_DEFAULT, NULL, rw_hint_to_seg_type( - WRITE_LIFE_NOT_SET)); - /* inode->i_write_hint)); */ + inode->i_write_hint)); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1499,7 +1583,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) if (!f2fs_encrypted_file(inode)) return 0; - /* wait for GCed encrypted page writeback */ + /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); retry_encrypt: @@ -1649,6 +1733,7 @@ got_it: goto out_writepage; set_page_writeback(page); + ClearPageError(page); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -1671,6 +1756,7 @@ got_it: goto out_writepage; set_page_writeback(page); + ClearPageError(page); /* LFS mode write path */ write_data_page(&dn, fio); @@ -2211,8 +2297,8 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_file(inode)) + /* wait for GCed page writeback via META_MAPPING */ + if (f2fs_post_read_required(inode)) f2fs_wait_on_block_writeback(sbi, blkaddr); if (len == PAGE_SIZE || PageUptodate(page)) @@ -2304,15 +2390,18 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t count = iov_iter_count(iter); int rw = iov_iter_rw(iter); int err; + enum rw_hint hint = iocb->ki_hint; + int whint_mode = F2FS_OPTION(sbi).whint_mode; err = check_direct_IO(inode, iter, offset); if (err) return err; - if (__force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, rw)) return 0; if (trace_android_fs_dataread_start_enabled() && @@ -2339,11 +2428,24 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, } trace_f2fs_direct_IO_enter(inode, offset, count, rw); - down_read(&F2FS_I(inode)->dio_rwsem[rw]); + if (rw == WRITE && whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + + if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) { + if (iocb->ki_flags & IOCB_NOWAIT) { + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + down_read(&F2FS_I(inode)->dio_rwsem[rw]); + } + err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); @@ -2352,7 +2454,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, f2fs_write_failed(mapping, offset + count); } } - +out: if (trace_android_fs_dataread_start_enabled() && (iov_iter_rw(iter) == READ)) trace_android_fs_dataread_end(inode, offset, count); @@ -2409,37 +2511,6 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } -/* - * This was copied from __set_page_dirty_buffers which gives higher performance - * in very high speed storages. (e.g., pmem) - */ -void f2fs_set_page_dirty_nobuffers(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct mem_cgroup *memcg; - unsigned long flags; - - if (unlikely(!mapping)) - return; - - spin_lock(&mapping->private_lock); - memcg = mem_cgroup_begin_page_stat(page); - SetPageDirty(page); - spin_unlock(&mapping->private_lock); - - spin_lock_irqsave(&mapping->tree_lock, flags); - WARN_ON_ONCE(!PageUptodate(page)); - account_page_dirtied(page, mapping, memcg); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - spin_unlock_irqrestore(&mapping->tree_lock, flags); - - mem_cgroup_end_page_stat(memcg); - - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return; -} - static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -2463,7 +2534,7 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); update_dirty_page(inode, page); return 1; } @@ -2556,3 +2627,27 @@ const struct address_space_operations f2fs_dblock_aops = { .migratepage = f2fs_migrate_page, #endif }; + +int __init f2fs_init_post_read_processing(void) +{ + bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); + if (!bio_post_read_ctx_cache) + goto fail; + bio_post_read_ctx_pool = + mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, + bio_post_read_ctx_cache); + if (!bio_post_read_ctx_pool) + goto fail_free_cache; + return 0; + +fail_free_cache: + kmem_cache_destroy(bio_post_read_ctx_cache); +fail: + return -ENOMEM; +} + +void __exit f2fs_destroy_post_read_processing(void) +{ + mempool_destroy(bio_post_read_ctx_pool); + kmem_cache_destroy(bio_post_read_ctx_cache); +} diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 560b707050ca..41d32171bd52 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -361,6 +361,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *dpage) { struct page *page; + int dummy_encrypt = DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(dir)); int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { @@ -387,7 +388,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) { + if ((f2fs_encrypted_inode(dir) || dummy_encrypt) && + f2fs_may_encrypt(inode)) { err = fscrypt_inherit_context(dir, inode, page, false); if (err) goto put_error; @@ -396,8 +398,6 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, page = get_node_page(F2FS_I_SB(dir), inode->i_ino); if (IS_ERR(page)) return page; - - set_cold_node(inode, page); } if (new_name) { @@ -704,7 +704,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); - add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(F2FS_I_SB(dir)).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ff2352a0ed15..d5a861bf2b42 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -460,7 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, struct rb_node *insert_parent) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct rb_node **p = &et->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -706,6 +706,9 @@ void f2fs_drop_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + if (!f2fs_may_extent_tree(inode)) + return; + set_inode_flag(inode, FI_NO_EXTENT); write_lock(&et->lock); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 34595ca42f68..d0bfcfed35e2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,9 +99,10 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 -#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) +#define F2FS_OPTION(sbi) ((sbi)->mount_opt) +#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (F2FS_OPTION(sbi).opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (F2FS_OPTION(sbi).opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -114,7 +115,26 @@ typedef u32 block_t; /* typedef u32 nid_t; struct f2fs_mount_info { - unsigned int opt; + unsigned int opt; + int write_io_size_bits; /* Write IO size bits */ + block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ + int active_logs; /* # of active logs */ + int inline_xattr_size; /* inline xattr size */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; /* For fault injection */ +#endif +#ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char *s_qf_names[MAXQUOTAS]; + int s_jquota_fmt; /* Format of quota to use */ +#endif + /* For which write hints are passed down to block layer */ + int whint_mode; + int alloc_mode; /* segment allocation policy */ + int fsync_mode; /* fsync policy */ + bool test_dummy_encryption; /* test dummy encryption */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -126,6 +146,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_FEATURE_INODE_CRTIME 0x0100 +#define F2FS_FEATURE_LOST_FOUND 0x0200 +#define F2FS_FEATURE_VERITY 0x0400 /* reserved */ #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -509,7 +531,7 @@ static inline void make_dentry_ptr_block(struct inode *inode, d->inode = inode; d->max = NR_DENTRY_IN_BLOCK; d->nr_bitmap = SIZE_OF_DENTRY_BITMAP; - d->bitmap = &t->dentry_bitmap; + d->bitmap = t->dentry_bitmap; d->dentry = t->dentry; d->filename = t->filename; } @@ -635,6 +657,8 @@ enum { #define FADVISE_ENCRYPT_BIT 0x04 #define FADVISE_ENC_NAME_BIT 0x08 #define FADVISE_KEEP_SIZE_BIT 0x10 +#define FADVISE_HOT_BIT 0x20 +#define FADVISE_VERITY_BIT 0x40 /* reserved */ #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) @@ -649,6 +673,9 @@ enum { #define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT) #define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT) #define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT) +#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT) +#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT) +#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT) #define DEF_DIR_LEVEL 0 @@ -696,6 +723,7 @@ struct f2fs_inode_info { kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ + struct timespec i_disk_time[4]; /* inode disk times */ }; static inline void get_extent_info(struct extent_info *ext, @@ -802,7 +830,7 @@ struct f2fs_nm_info { unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ - unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; + unsigned char **free_nid_bitmap; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ @@ -1035,6 +1063,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + bool is_meta; /* indicate borrow meta inode mapping or not */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ }; @@ -1096,10 +1125,34 @@ enum { MAX_TIME, }; +enum { + WHINT_MODE_OFF, /* not pass down write hints */ + WHINT_MODE_USER, /* try to pass down hints given by users */ + WHINT_MODE_FS, /* pass down hints with F2FS policy */ +}; + +enum { + ALLOC_MODE_DEFAULT, /* stay default */ + ALLOC_MODE_REUSE, /* reuse segments as much as possible */ +}; + +enum fsync_mode { + FSYNC_MODE_POSIX, /* fsync follows posix semantics */ + FSYNC_MODE_STRICT, /* fsync behaves in line with ext4 */ +}; + +#ifdef CONFIG_F2FS_FS_ENCRYPTION +#define DUMMY_ENCRYPTION_ENABLED(sbi) \ + (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) +#else +#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ + struct rw_semaphore sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ @@ -1119,7 +1172,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NR_PAGE_TYPE - 1][NR_TEMP_TYPE]; /* bio ordering for NODE/DATA */ - int write_io_size_bits; /* Write IO size bits */ mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ @@ -1169,9 +1221,7 @@ struct f2fs_sb_info { unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ - int active_logs; /* # of active logs */ int dir_level; /* directory level */ - int inline_xattr_size; /* inline xattr size */ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ @@ -1181,9 +1231,6 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ - block_t root_reserved_blocks; /* root reserved blocks */ - kuid_t s_resuid; /* reserved blocks for uid */ - kgid_t s_resgid; /* reserved blocks for gid */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1268,17 +1315,6 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; - - /* For fault injection */ -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info fault_info; -#endif - -#ifdef CONFIG_QUOTA - /* Names of quota files with journalled quota */ - char *s_qf_names[MAXQUOTAS]; - int s_jquota_fmt; /* Format of quota to use */ -#endif }; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1288,7 +1324,7 @@ struct f2fs_sb_info { __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (!ffi->inject_rate) return false; @@ -1637,7 +1673,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) } static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, - struct inode *inode) + struct inode *inode, bool cap) { if (!inode) return true; @@ -1645,12 +1681,12 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; if (IS_NOQUOTA(inode)) return true; - if (capable(CAP_SYS_RESOURCE)) + if (uid_eq(F2FS_OPTION(sbi).s_resuid, current_fsuid())) return true; - if (uid_eq(sbi->s_resuid, current_fsuid())) + if (!gid_eq(F2FS_OPTION(sbi).s_resgid, GLOBAL_ROOT_GID) && + in_group_p(F2FS_OPTION(sbi).s_resgid)) return true; - if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && - in_group_p(sbi->s_resgid)) + if (cap && capable(CAP_SYS_RESOURCE)) return true; return false; } @@ -1685,8 +1721,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; - if (!__allow_reserved_blocks(sbi, inode)) - avail_user_block_count -= sbi->root_reserved_blocks; + if (!__allow_reserved_blocks(sbi, inode, true)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; @@ -1821,6 +1857,12 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); int offset; + if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) { + offset = (flag == SIT_BITMAP) ? + le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0; + return &ckpt->sit_nat_version_bitmap + offset; + } + if (__cp_payload(sbi) > 0) { if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; @@ -1886,8 +1928,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, valid_block_count = sbi->total_valid_block_count + sbi->current_reserved_blocks + 1; - if (!__allow_reserved_blocks(sbi, inode)) - valid_block_count += sbi->root_reserved_blocks; + if (!__allow_reserved_blocks(sbi, inode, false)) + valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -2489,7 +2531,17 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) } if (!is_inode_flag_set(inode, FI_AUTO_RECOVER) || file_keep_isize(inode) || - i_size_read(inode) & PAGE_MASK) + i_size_read(inode) & ~PAGE_MASK) + return false; + + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) return false; down_read(&F2FS_I(inode)->i_sem); @@ -2499,8 +2551,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) return ret; } -#define sb_rdonly f2fs_readonly -static inline int f2fs_readonly(struct super_block *sb) +static inline bool f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; } @@ -2562,15 +2613,6 @@ static inline void *kvzalloc(size_t size, gfp_t flags) return ret; } -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = 1, /* RWH_WRITE_LIFE_NONE */ - WRITE_LIFE_SHORT = 2, /* RWH_WRITE_LIFE_SHORT */ - WRITE_LIFE_MEDIUM = 3, /* RWH_WRITE_LIFE_MEDIUM */ - WRITE_LIFE_LONG = 4, /* RWH_WRITE_LIFE_LONG */ - WRITE_LIFE_EXTREME = 5, /* RWH_WRITE_LIFE_EXTREME */ -}; - static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { @@ -2679,6 +2721,8 @@ void handle_failed_inode(struct inode *inode); /* * namei.c */ +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); /* @@ -2851,6 +2895,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); int rw_hint_to_seg_type(enum rw_hint hint); +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, + enum temp_type temp); /* * checkpoint.c @@ -2891,6 +2937,8 @@ void destroy_checkpoint_caches(void); /* * data.c */ +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, nid_t ino, pgoff_t idx, @@ -2922,7 +2970,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -void f2fs_set_page_dirty_nobuffers(struct page *page); int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, enum iostat_type io_type); @@ -2933,6 +2980,7 @@ int f2fs_release_page(struct page *page, gfp_t wait); int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); #endif +bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); /* * gc.c @@ -3250,50 +3298,30 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) #endif } -static inline bool f2fs_bio_encrypted(struct bio *bio) -{ - return bio->bi_private != NULL; -} - -static inline int f2fs_sb_has_crypto(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); -} - -static inline int f2fs_sb_mounted_blkzoned(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_BLKZONED); -} - -static inline int f2fs_sb_has_extra_attr(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_EXTRA_ATTR); -} - -static inline int f2fs_sb_has_project_quota(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); -} - -static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); -} - -static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) +/* + * Returns true if the reads of the inode's data need to undergo some + * postprocessing step, like decryption or authenticity verification. + */ +static inline bool f2fs_post_read_required(struct inode *inode) { - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); + return f2fs_encrypted_file(inode); } -static inline int f2fs_sb_has_quota_ino(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); +#define F2FS_FEATURE_FUNCS(name, flagname) \ +static inline int f2fs_sb_has_##name(struct super_block *sb) \ +{ \ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_##flagname); \ } -static inline int f2fs_sb_has_inode_crtime(struct super_block *sb) -{ - return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME); -} +F2FS_FEATURE_FUNCS(encrypt, ENCRYPT); +F2FS_FEATURE_FUNCS(blkzoned, BLKZONED); +F2FS_FEATURE_FUNCS(extra_attr, EXTRA_ATTR); +F2FS_FEATURE_FUNCS(project_quota, PRJQUOTA); +F2FS_FEATURE_FUNCS(inode_chksum, INODE_CHKSUM); +F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); +F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); +F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, @@ -3313,7 +3341,7 @@ static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) { struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); - return blk_queue_discard(q) || f2fs_sb_mounted_blkzoned(sbi->sb); + return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) @@ -3342,4 +3370,11 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +{ + return (f2fs_post_read_required(inode) || + (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || + F2FS_I_SB(inode)->s_ndevs); +} + #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 65cda5bc61b7..7587758a285f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -113,8 +113,8 @@ mapped: /* fill the page */ f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed encrypted page writeback */ - if (f2fs_encrypted_file(inode)) + /* wait for GCed page writeback via META_MAPPING */ + if (f2fs_post_read_required(inode)) f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); out_sem: @@ -166,9 +166,10 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) cp_reason = CP_FASTBOOT_MODE; - else if (sbi->active_logs == 2) + else if (F2FS_OPTION(sbi).active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; - else if (need_dentry_mark(sbi, inode->i_ino) && + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT && + need_dentry_mark(sbi, inode->i_ino) && exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; @@ -481,6 +482,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + + filp->f_mode |= FMODE_NOWAIT; + return dquot_file_open(inode, filp); } @@ -571,7 +575,6 @@ truncate_out: int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; @@ -580,7 +583,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= sbi->max_file_blocks) goto free_partial; @@ -1354,8 +1357,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } out_sem: up_write(&F2FS_I(inode)->i_mmap_sem); @@ -1709,6 +1716,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) inode_lock(inode); + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + if (f2fs_is_volatile_file(inode)) goto err_out; @@ -1727,6 +1736,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false); } err_out: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1936,7 +1946,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -1946,7 +1956,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { - if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb)) return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -1957,16 +1967,18 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; - if (!f2fs_sb_has_crypto(inode->i_sb)) + if (!f2fs_sb_has_encrypt(inode->i_sb)) return -EOPNOTSUPP; - if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) - goto got_it; - err = mnt_want_write_file(filp); if (err) return err; + down_write(&sbi->sb_lock); + + if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) + goto got_it; + /* update superblock with uuid */ generate_random_uuid(sbi->raw_super->encrypt_pw_salt); @@ -1974,15 +1986,16 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) { /* undo new data */ memset(sbi->raw_super->encrypt_pw_salt, 0, 16); - mnt_drop_write_file(filp); - return err; + goto out_err; } - mnt_drop_write_file(filp); got_it: if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt, 16)) - return -EFAULT; - return 0; + err = -EFAULT; +out_err: + up_write(&sbi->sb_lock); + mnt_drop_write_file(filp); + return err; } static int f2fs_ioc_gc(struct file *filp, unsigned long arg) @@ -2043,8 +2056,10 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) return ret; end = range.start + range.len; - if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) - return -EINVAL; + if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) { + ret = -EINVAL; + goto out; + } do_more: if (!range.sync) { if (!mutex_trylock(&sbi->gc_mutex)) { @@ -2685,25 +2700,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; - inode_lock(inode); + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + ret = generic_write_checks(iocb, from); if (ret > 0) { + bool preallocated = false; + size_t target_size = 0; int err; if (iov_iter_fault_in_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - return err; + if ((iocb->ki_flags & IOCB_NOWAIT) && + (iocb->ki_flags & IOCB_DIRECT)) { + if (!f2fs_overwrite_io(inode, iocb->ki_pos, + iov_iter_count(from)) || + f2fs_has_inline_data(inode) || + f2fs_force_buffered_io(inode, WRITE)) { + inode_unlock(inode); + return -EAGAIN; + } + + } else { + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + return err; + } } blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + /* if we couldn't write data, we should deallocate blocks. */ + if (preallocated && i_size_read(inode) < target_size) + f2fs_truncate(inode); + if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d0de3429c26c..c009b50d69f5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,14 +76,15 @@ static int gc_thread_func(void *data) * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. */ - if (!mutex_trylock(&sbi->gc_mutex)) - goto next; - if (gc_th->gc_urgent) { wait_ms = gc_th->urgent_sleep_time; + mutex_lock(&sbi->gc_mutex); goto do_gc; } + if (!mutex_trylock(&sbi->gc_mutex)) + goto next; + if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); @@ -161,12 +162,17 @@ static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; - if (gc_th && gc_th->gc_idle) { + if (!gc_th) + return gc_mode; + + if (gc_th->gc_idle) { if (gc_th->gc_idle == 1) gc_mode = GC_CB; else if (gc_th->gc_idle == 2) gc_mode = GC_GREEDY; } + if (gc_th->gc_urgent) + gc_mode = GC_GREEDY; return gc_mode; } @@ -188,11 +194,14 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, } /* we need to check every dirty segments in the FG_GC case */ - if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) + if (gc_type != FG_GC && + (sbi->gc_thread && !sbi->gc_thread->gc_urgent) && + p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first */ - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + /* let's select beginning hot/small space first in no_heap mode*/ + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; @@ -841,8 +850,8 @@ next_step: if (IS_ERR(inode) || is_bad_inode(inode)) continue; - /* if encrypted inode, let's go phase 3 */ - if (f2fs_encrypted_file(inode)) { + /* if inode uses special I/O path, let's go phase 3 */ + if (f2fs_post_read_required(inode)) { add_gc_inode(gc_list, inode); continue; } @@ -890,7 +899,7 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; - if (f2fs_encrypted_file(inode)) + if (f2fs_post_read_required(inode)) move_data_block(inode, start_bidx, segno, off); else move_data_page(inode, start_bidx, gc_type, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 37ab2159506a..922a213693c1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -26,7 +26,7 @@ bool f2fs_may_inline_data(struct inode *inode) if (i_size_read(inode) > MAX_INLINE_DATA(inode)) return false; - if (f2fs_encrypted_file(inode)) + if (f2fs_post_read_required(inode)) return false; return true; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 10be247ca421..51846fc54fbd 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -284,6 +284,10 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -439,12 +443,15 @@ void update_inode(struct inode *inode, struct page *node_page) } __set_inode_rdev(inode, ri); - set_cold_node(inode, node_page); /* deleted inode */ if (inode->i_nlink == 0) clear_inline_node(node_page); + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; + F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; + F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; + F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime; } void update_inode_page(struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6bb1adb84324..fecae8685d2a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -78,7 +78,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ - if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) + if ((f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) && + f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); if (f2fs_sb_has_extra_attr(sbi->sb)) { @@ -97,7 +98,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); if (f2fs_has_inline_xattr(inode)) - xattr_size = sbi->inline_xattr_size; + xattr_size = F2FS_OPTION(sbi).inline_xattr_size; /* Otherwise, will be 0 */ } else if (f2fs_has_inline_xattr(inode) || f2fs_has_inline_dentry(inode)) { @@ -142,7 +143,7 @@ fail_drop: return ERR_PTR(err); } -static int is_multimedia_file(const unsigned char *s, const char *sub) +static int is_extension_exist(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); @@ -168,19 +169,94 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) /* * Set multimedia files as cold files for hot/cold data separation */ -static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, +static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode, const unsigned char *name) { - int i; - __u8 (*extlist)[8] = sbi->raw_super->extension_list; + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int i, cold_count, hot_count; + + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; - int count = le32_to_cpu(sbi->raw_super->extension_count); - for (i = 0; i < count; i++) { - if (is_multimedia_file(name, extlist[i])) { + for (i = 0; i < cold_count + hot_count; i++) { + if (!is_extension_exist(name, extlist[i])) + continue; + if (i < cold_count) file_set_cold(inode); - break; - } + else + file_set_hot(inode); + break; + } + + up_read(&sbi->sb_lock); +} + +int update_extension_list(struct f2fs_sb_info *sbi, const char *name, + bool hot, bool set) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int total_count = cold_count + hot_count; + int start, count; + int i; + + if (set) { + if (total_count == F2FS_MAX_EXTENSION) + return -EINVAL; + } else { + if (!hot && !cold_count) + return -EINVAL; + if (hot && !hot_count) + return -EINVAL; + } + + if (hot) { + start = cold_count; + count = total_count; + } else { + start = 0; + count = cold_count; + } + + for (i = start; i < count; i++) { + if (strcmp(name, extlist[i])) + continue; + + if (set) + return -EINVAL; + + memcpy(extlist[i], extlist[i + 1], + F2FS_EXTENSION_LEN * (total_count - i - 1)); + memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN); + if (hot) + sbi->raw_super->hot_ext_count = hot_count - 1; + else + sbi->raw_super->extension_count = + cpu_to_le32(cold_count - 1); + return 0; + } + + if (!set) + return -EINVAL; + + if (hot) { + strncpy(extlist[count], name, strlen(name)); + sbi->raw_super->hot_ext_count = hot_count + 1; + } else { + char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN]; + + memcpy(buf, &extlist[cold_count], + F2FS_EXTENSION_LEN * hot_count); + memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN); + strncpy(extlist[cold_count], name, strlen(name)); + memcpy(&extlist[cold_count + 1], buf, + F2FS_EXTENSION_LEN * hot_count); + sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1); } + return 0; } static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -203,7 +279,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return PTR_ERR(inode); if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) - set_cold_files(sbi, inode, dentry->d_name.name); + set_file_temperature(sbi, inode, dentry->d_name.name); inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; @@ -218,8 +294,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -478,27 +554,16 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; size_t len = strlen(symname); - struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1); - struct fscrypt_symlink_data *sd = NULL; + struct fscrypt_str disk_link; int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); - if (err) - return err; - - if (!fscrypt_has_encryption_key(dir)) - return -ENOKEY; - - disk_link.len = (fscrypt_fname_encrypted_size(dir, len) + - sizeof(struct fscrypt_symlink_data)); - } - - if (disk_link.len > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; + err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, + &disk_link); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -508,7 +573,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) return PTR_ERR(inode); - if (f2fs_encrypted_inode(inode)) + if (IS_ENCRYPTED(inode)) inode->i_op = &f2fs_encrypted_symlink_inode_operations; else inode->i_op = &f2fs_symlink_inode_operations; @@ -518,44 +583,19 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) - goto out; + goto out_handle_failed_inode; f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); - if (f2fs_encrypted_inode(inode)) { - struct qstr istr = QSTR_INIT(symname, len); - struct fscrypt_str ostr; - - sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS); - if (!sd) { - err = -ENOMEM; - goto err_out; - } - - err = fscrypt_get_encryption_info(inode); - if (err) - goto err_out; - - if (!fscrypt_has_encryption_key(inode)) { - err = -ENOKEY; - goto err_out; - } - - ostr.name = sd->encrypted_path; - ostr.len = disk_link.len; - err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr); - if (err) - goto err_out; - - sd->len = cpu_to_le16(ostr.len); - disk_link.name = (char *)sd; - } + err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link); + if (err) + goto err_out; err = page_symlink(inode, disk_link.name, disk_link.len); err_out: - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); /* * Let's flush symlink data in order to avoid broken symlink as much as @@ -576,12 +616,14 @@ err_out: f2fs_unlink(dir, dentry); } - kfree(sd); - f2fs_balance_fs(sbi, true); - return err; -out: + goto out_free_encrypted_link; + +out_handle_failed_inode: handle_failed_inode(inode); +out_free_encrypted_link: + if (disk_link.name != (unsigned char *)symname) + kfree(disk_link.name); return err; } @@ -616,8 +658,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -668,8 +710,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, alloc_nid_done(sbi, inode->i_ino); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); @@ -743,10 +785,12 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir)) { + if (f2fs_encrypted_inode(dir) || DUMMY_ENCRYPTION_ENABLED(sbi)) { int err = fscrypt_get_encryption_info(dir); if (err) return err; @@ -926,7 +970,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); f2fs_i_links_write(old_dir, false); } - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -1076,8 +1121,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); - add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); - add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + } f2fs_unlock_op(sbi); @@ -1127,65 +1174,21 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cookie) { - struct page *cpage = NULL; - char *caddr, *paddr = NULL; - struct fscrypt_str cstr = FSTR_INIT(NULL, 0); - struct fscrypt_str pstr = FSTR_INIT(NULL, 0); - struct fscrypt_symlink_data *sd; struct inode *inode = d_inode(dentry); - u32 max_size = inode->i_sb->s_blocksize; - int res; - - res = fscrypt_get_encryption_info(inode); - if (res) - return ERR_PTR(res); - - cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) - return ERR_CAST(cpage); - caddr = page_address(cpage); - - /* Symlink is encrypted */ - sd = (struct fscrypt_symlink_data *)caddr; - cstr.name = sd->encrypted_path; - cstr.len = le16_to_cpu(sd->len); - - /* this is broken symlink case */ - if (unlikely(cstr.len == 0)) { - res = -ENOENT; - goto errout; - } - - if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) { - /* Symlink data on the disk is corrupted */ - res = -EIO; - goto errout; - } - res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr); - if (res) - goto errout; - - res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); - if (res) - goto errout; - - /* this is broken symlink case */ - if (unlikely(pstr.name[0] == 0)) { - res = -ENOENT; - goto errout; - } + struct page *page; + void *target; - paddr = pstr.name; + if (!dentry) + return ERR_PTR(-ECHILD); - /* Null-terminate the name */ - paddr[pstr.len] = '\0'; + page = read_mapping_page(inode->i_mapping, 0, NULL); + if (IS_ERR(page)) + return ERR_CAST(page); - put_page(cpage); - return *cookie = paddr; -errout: - fscrypt_fname_free_buffer(&pstr); - put_page(cpage); - return ERR_PTR(res); + target = fscrypt_get_symlink(inode, page_address(page), + inode->i_sb->s_blocksize); + put_page(page); + return *cookie = target; } const struct inode_operations f2fs_encrypted_symlink_inode_operations = { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c294d0feea08..3871e7d3f69e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) __free_nat_entry(e); } -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) +static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) { nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; @@ -209,15 +209,36 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, head->entry_cnt = 0; f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } + return head; +} + +static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, + struct nat_entry *ne) +{ + struct nat_entry_set *head; + bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR; + + if (!new_ne) + head = __grab_nat_entry_set(nm_i, ne); + + /* + * update entry_cnt in below condition: + * 1. update NEW_ADDR to valid block address; + * 2. update old block address to new one; + */ + if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) || + !get_nat_flag(ne, IS_DIRTY))) + head->entry_cnt++; + + set_nat_flag(ne, IS_PREALLOC, new_ne); if (get_nat_flag(ne, IS_DIRTY)) goto refresh_list; nm_i->dirty_nat_cnt++; - head->entry_cnt++; set_nat_flag(ne, IS_DIRTY, true); refresh_list: - if (nat_get_blkaddr(ne) == NEW_ADDR) + if (new_ne) list_del_init(&ne->list); else list_move_tail(&ne->list, &head->entry_list); @@ -1076,7 +1097,7 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(dn->inode, page); + set_cold_node(page, S_ISDIR(dn->inode->i_mode)); if (!PageUptodate(page)) SetPageUptodate(page); if (set_page_dirty(page)) @@ -1754,7 +1775,7 @@ static int f2fs_set_node_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); if (!PageDirty(page)) { - f2fs_set_page_dirty_nobuffers(page); + __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); @@ -2313,6 +2334,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); + set_cold_node(page, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); @@ -2602,8 +2624,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!enabled_nat_bits(sbi, NULL)) return 0; - nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + - F2FS_BLKSIZE - 1); + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kzalloc(sbi, nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) @@ -2729,12 +2750,20 @@ static int init_node_manager(struct f2fs_sb_info *sbi) static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); + int i; - nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * - NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); + nm_i->free_nid_bitmap = f2fs_kzalloc(sbi, nm_i->nat_blocks * + sizeof(unsigned char *), GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; + for (i = 0; i < nm_i->nat_blocks; i++) { + nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi, + NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL); + if (!nm_i->free_nid_bitmap) + return -ENOMEM; + } + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) @@ -2825,7 +2854,13 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); - kvfree(nm_i->free_nid_bitmap); + if (nm_i->free_nid_bitmap) { + int i; + + for (i = 0; i < nm_i->nat_blocks; i++) + kvfree(nm_i->free_nid_bitmap[i]); + kfree(nm_i->free_nid_bitmap); + } kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 081ef0d672bf..b95e49e4a928 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -44,6 +44,7 @@ enum { HAS_FSYNCED_INODE, /* is the inode fsynced before? */ HAS_LAST_FSYNC, /* has the latest node fsync mark? */ IS_DIRTY, /* this nat entry is dirty? */ + IS_PREALLOC, /* nat entry is preallocated */ }; /* @@ -422,12 +423,12 @@ static inline void clear_inline_node(struct page *page) ClearPageChecked(page); } -static inline void set_cold_node(struct inode *inode, struct page *page) +static inline void set_cold_node(struct page *page, bool is_dir) { struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); - if (S_ISDIR(inode->i_mode)) + if (is_dir) flag &= ~(0x1 << COLD_BIT_SHIFT); else flag |= (0x1 << COLD_BIT_SHIFT); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 210de28c9cd2..4ddc2262baf1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -242,6 +242,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; + unsigned int loop_cnt = 0; + unsigned int free_blocks = sbi->user_block_count - + valid_user_blocks(sbi); int err = 0; /* get node pages in the current segment */ @@ -294,6 +297,17 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, if (IS_INODE(page) && is_dent_dnode(page)) entry->last_dentry = blkaddr; next: + /* sanity check in order to detect looped node chain */ + if (++loop_cnt >= free_blocks || + blkaddr == next_blkaddr_of_node(page)) { + f2fs_msg(sbi->sb, KERN_NOTICE, + "%s: detect looped node chain, " + "blkaddr:%u, next:%u", + __func__, blkaddr, next_blkaddr_of_node(page)); + err = -EINVAL; + break; + } + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bf98f6f34b7e..d7bac60ad719 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1491,12 +1491,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) { + if (dcc->discard_wake) dcc->discard_wake = 0; - if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - init_discard_policy(&dpolicy, - DPOLICY_FORCE, 1); - } + + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + init_discard_policy(&dpolicy, DPOLICY_FORCE, 1); sb_start_intwrite(sbi->sb); @@ -1565,7 +1564,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { #ifdef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sbi->sb) && + if (f2fs_sb_has_blkzoned(sbi->sb) && bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif @@ -1763,7 +1762,7 @@ find_next: sbi->blocks_per_seg, cur_pos); len = next_pos - cur_pos; - if (f2fs_sb_mounted_blkzoned(sbi->sb) || + if (f2fs_sb_has_blkzoned(sbi->sb) || (force && len < cpc->trim_minlen)) goto skip; @@ -1807,7 +1806,7 @@ void init_discard_policy(struct discard_policy *dpolicy, } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; + dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { @@ -1943,7 +1942,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (se->type == CURSEG_WARM_NODE) { + if (IS_NODESEG(se->type)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2244,11 +2243,17 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; - if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + if (test_opt(sbi, NOHEAP) && + (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) return SIT_I(sbi)->last_victim[ALLOC_NEXT]; + + /* find segments from 0 to reuse freed segments */ + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + return 0; + return CURSEG_I(sbi, type)->segno; } @@ -2535,6 +2540,101 @@ int rw_hint_to_seg_type(enum rw_hint hint) } } +/* This returns write hints for each segment type. This hints will be + * passed down to block layer. There are mapping tables which depend on + * the mount option 'whint_mode'. + * + * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + * + * 2) whint_mode=user-based. F2FS tries to pass down hints given by users. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NOT_SET + * HOT_NODE " + * WARM_NODE " + * COLD_NODE " + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + * + * 3) whint_mode=fs-based. F2FS passes down hints with its policy. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_MEDIUM; + * HOT_NODE WRITE_LIFE_NOT_SET + * WARM_NODE " + * COLD_NODE WRITE_LIFE_NONE + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG + * WRITE_LIFE_NONE " " + * WRITE_LIFE_MEDIUM " " + * WRITE_LIFE_LONG " " + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ + +enum rw_hint io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_NOT_SET; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else { + return WRITE_LIFE_NOT_SET; + } + } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) { + if (type == DATA) { + if (temp == WARM) + return WRITE_LIFE_LONG; + else if (temp == HOT) + return WRITE_LIFE_SHORT; + else if (temp == COLD) + return WRITE_LIFE_EXTREME; + } else if (type == NODE) { + if (temp == WARM || temp == HOT) + return WRITE_LIFE_NOT_SET; + else if (temp == COLD) + return WRITE_LIFE_NONE; + } else if (type == META) { + return WRITE_LIFE_MEDIUM; + } + } + return WRITE_LIFE_NOT_SET; +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2567,7 +2667,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (is_cold_data(fio->page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - if (is_inode_flag_set(inode, FI_HOT_DATA)) + if (file_is_hot(inode) || + is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; @@ -2583,7 +2684,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; - switch (fio->sbi->active_logs) { + switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: type = __get_segment_type_2(fio); break; @@ -2723,6 +2824,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info fio = { .sbi = sbi, .type = META, + .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_META | REQ_PRIO, .old_blkaddr = page->index, @@ -2769,8 +2871,15 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) int rewrite_data_page(struct f2fs_io_info *fio) { int err; + struct f2fs_sb_info *sbi = fio->sbi; fio->new_blkaddr = fio->old_blkaddr; + /* i/o temperature is needed for passing down write hints */ + __get_segment_type(fio); + + f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi, + GET_SEGNO(sbi, fio->new_blkaddr))->type)); + stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5d6d3e72be31..96a2d57ba8a4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -53,13 +53,19 @@ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ (sbi)->segs_per_sec)) \ -#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) -#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) +#define MAIN_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->main_blkaddr)) +#define SEG0_BLKADDR(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->seg0_blkaddr : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment0_blkaddr)) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) #define MAIN_SECS(sbi) ((sbi)->total_sections) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_SEGS(sbi) \ + (SM_I(sbi) ? SM_I(sbi)->segment_count : \ + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) #define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) @@ -596,6 +602,8 @@ static inline int utilization(struct f2fs_sb_info *sbi) #define DEF_MIN_FSYNC_BLOCKS 8 #define DEF_MIN_HOT_BLOCKS 16 +#define SMALL_VOLUME_SEGMENTS (16 * 512) /* 16GB */ + enum { F2FS_IPU_FORCE, F2FS_IPU_SSR, @@ -630,10 +638,17 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr) { - BUG_ON(blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + struct f2fs_sb_info *sbi = fio->sbi; + + if (PAGE_TYPE_OF_BIO(fio->type) == META && + (!is_read_io(fio->op) || fio->is_meta)) + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) || + blk_addr >= MAIN_BLKADDR(sbi)); + else + BUG_ON(blk_addr < MAIN_BLKADDR(sbi) || + blk_addr >= MAX_BLKADDR(sbi)); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 872f9c1078f0..55b2bad55671 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -60,7 +60,7 @@ char *fault_name[FAULT_MAX] = { static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate) { - struct f2fs_fault_info *ffi = &sbi->fault_info; + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { atomic_set(&ffi->inject_ops, 0); @@ -129,6 +129,10 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, + Opt_whint, + Opt_alloc, + Opt_fsync, + Opt_test_dummy_encryption, Opt_err, }; @@ -182,6 +186,10 @@ static match_table_t f2fs_tokens = { {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, + {Opt_whint, "whint_mode=%s"}, + {Opt_alloc, "alloc_mode=%s"}, + {Opt_fsync, "fsync_mode=%s"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_err, NULL}, }; @@ -202,21 +210,24 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) block_t limit = (sbi->user_block_count << 1) / 1000; /* limit is 0.2% */ - if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) { - sbi->root_reserved_blocks = limit; + if (test_opt(sbi, RESERVE_ROOT) && + F2FS_OPTION(sbi).root_reserved_blocks > limit) { + F2FS_OPTION(sbi).root_reserved_blocks = limit; f2fs_msg(sbi->sb, KERN_INFO, "Reduce reserved blocks for root = %u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && - (!uid_eq(sbi->s_resuid, + (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || - !gid_eq(sbi->s_resgid, + !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) f2fs_msg(sbi->sb, KERN_INFO, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -236,7 +247,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, char *qname; int ret = -EINVAL; - if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); @@ -254,8 +265,8 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "Not enough memory for storing quotafile name"); return -EINVAL; } - if (sbi->s_qf_names[qtype]) { - if (strcmp(sbi->s_qf_names[qtype], qname) == 0) + if (F2FS_OPTION(sbi).s_qf_names[qtype]) { + if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else f2fs_msg(sb, KERN_ERR, @@ -268,7 +279,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quotafile must be on filesystem root"); goto errout; } - sbi->s_qf_names[qtype] = qname; + F2FS_OPTION(sbi).s_qf_names[qtype] = qname; set_opt(sbi, QUOTA); return 0; errout: @@ -280,13 +291,13 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { + if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); return -EINVAL; } - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + kfree(F2FS_OPTION(sbi).s_qf_names[qtype]); + F2FS_OPTION(sbi).s_qf_names[qtype] = NULL; return 0; } @@ -302,15 +313,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) "Cannot enable project quota enforcement."); return -1; } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA] || - sbi->s_qf_names[PRJQUOTA]) { - if (test_opt(sbi, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) { + if (test_opt(sbi, USRQUOTA) && + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) clear_opt(sbi, USRQUOTA); - if (test_opt(sbi, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sbi, GRPQUOTA) && + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) clear_opt(sbi, GRPQUOTA); - if (test_opt(sbi, PRJQUOTA) && sbi->s_qf_names[PRJQUOTA]) + if (test_opt(sbi, PRJQUOTA) && + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) clear_opt(sbi, PRJQUOTA); if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || @@ -320,19 +335,19 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } - if (!sbi->s_jquota_fmt) { + if (!F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " "not specified"); return -1; } } - if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + if (f2fs_sb_has_quota_ino(sbi->sb) && F2FS_OPTION(sbi).s_jquota_fmt) { f2fs_msg(sbi->sb, KERN_INFO, "QUOTA feature is enabled, so ignore jquota_fmt"); - sbi->s_jquota_fmt = 0; + F2FS_OPTION(sbi).s_jquota_fmt = 0; } - if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) { f2fs_msg(sbi->sb, KERN_INFO, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -403,14 +418,14 @@ static int parse_options(struct super_block *sb, char *options) q = bdev_get_queue(sb->s_bdev); if (blk_queue_discard(q)) { set_opt(sbi, DISCARD); - } else if (!f2fs_sb_mounted_blkzoned(sb)) { + } else if (!f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "mounting with \"discard\" option, but " "the device does not support discard"); } break; case Opt_nodiscard: - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "discard is required for zoned block devices"); return -EINVAL; @@ -440,7 +455,7 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; set_opt(sbi, INLINE_XATTR_SIZE); - sbi->inline_xattr_size = arg; + F2FS_OPTION(sbi).inline_xattr_size = arg; break; #else case Opt_user_xattr: @@ -480,7 +495,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) return -EINVAL; - sbi->active_logs = arg; + F2FS_OPTION(sbi).active_logs = arg; break; case Opt_disable_ext_identify: set_opt(sbi, DISABLE_EXT_IDENTIFY); @@ -524,9 +539,9 @@ static int parse_options(struct super_block *sb, char *options) if (test_opt(sbi, RESERVE_ROOT)) { f2fs_msg(sb, KERN_INFO, "Preserve previous reserve_root=%u", - sbi->root_reserved_blocks); + F2FS_OPTION(sbi).root_reserved_blocks); } else { - sbi->root_reserved_blocks = arg; + F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); } break; @@ -539,7 +554,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid uid value %d", arg); return -EINVAL; } - sbi->s_resuid = uid; + F2FS_OPTION(sbi).s_resuid = uid; break; case Opt_resgid: if (args->from && match_int(args, &arg)) @@ -550,7 +565,7 @@ static int parse_options(struct super_block *sb, char *options) "Invalid gid value %d", arg); return -EINVAL; } - sbi->s_resgid = gid; + F2FS_OPTION(sbi).s_resgid = gid; break; case Opt_mode: name = match_strdup(&args[0]); @@ -559,7 +574,7 @@ static int parse_options(struct super_block *sb, char *options) return -ENOMEM; if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_WARNING, "adaptive mode is not allowed with " "zoned block device feature"); @@ -585,7 +600,7 @@ static int parse_options(struct super_block *sb, char *options) 1 << arg, BIO_MAX_PAGES); return -EINVAL; } - sbi->write_io_size_bits = arg; + F2FS_OPTION(sbi).write_io_size_bits = arg; break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) @@ -646,13 +661,13 @@ static int parse_options(struct super_block *sb, char *options) return ret; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_OLD; break; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V0; break; case Opt_jqfmt_vfsv1: - sbi->s_jquota_fmt = QFMT_VFS_V1; + F2FS_OPTION(sbi).s_jquota_fmt = QFMT_VFS_V1; break; case Opt_noquota: clear_opt(sbi, QUOTA); @@ -679,6 +694,73 @@ static int parse_options(struct super_block *sb, char *options) "quota operations not supported"); break; #endif + case Opt_whint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 10 && + !strncmp(name, "user-based", 10)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER; + } else if (strlen(name) == 3 && + !strncmp(name, "off", 3)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + } else if (strlen(name) == 8 && + !strncmp(name, "fs-based", 8)) { + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_alloc: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 7 && + !strncmp(name, "default", 7)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + } else if (strlen(name) == 5 && + !strncmp(name, "reuse", 5)) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_fsync: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 5 && + !strncmp(name, "posix", 5)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + } else if (strlen(name) == 6 && + !strncmp(name, "strict", 6)) { + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_test_dummy_encryption: +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (!f2fs_sb_has_encrypt(sb)) { + f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + return -EINVAL; + } + + F2FS_OPTION(sbi).test_dummy_encryption = true; + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mode enabled"); +#else + f2fs_msg(sb, KERN_INFO, + "Test dummy encryption mount option ignored"); +#endif + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -699,14 +781,22 @@ static int parse_options(struct super_block *sb, char *options) } if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!f2fs_sb_has_extra_attr(sb) || + !f2fs_sb_has_flexible_inline_xattr(sb)) { + f2fs_msg(sb, KERN_ERR, + "extra_attr or flexible_inline_xattr " + "feature is off"); + return -EINVAL; + } if (!test_opt(sbi, INLINE_XATTR)) { f2fs_msg(sb, KERN_ERR, "inline_xattr_size option should be " "set with inline_xattr option"); return -EINVAL; } - if (!sbi->inline_xattr_size || - sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + if (!F2FS_OPTION(sbi).inline_xattr_size || + F2FS_OPTION(sbi).inline_xattr_size >= + DEF_ADDRS_PER_INODE - F2FS_TOTAL_EXTRA_ATTR_SIZE - DEF_INLINE_RESERVED_SIZE - DEF_MIN_INLINE_SIZE) { @@ -715,6 +805,12 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; } } + + /* Not pass down write hints if the number of active logs is lesser + * than NR_CURSEG_TYPE. + */ + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; return 0; } @@ -731,7 +827,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; - fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); @@ -743,10 +838,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->i_mmap_sem); init_rwsem(&fi->i_xattr_sem); -#ifdef CONFIG_QUOTA - memset(&fi->i_dquot, 0, sizeof(fi->i_dquot)); - fi->i_reserved_quota = 0; -#endif /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -957,7 +1048,7 @@ static void f2fs_put_super(struct super_block *sb) mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif destroy_percpu_info(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) @@ -1071,8 +1162,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - if (buf->f_bfree > sbi->root_reserved_blocks) - buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) + buf->f_bavail = buf->f_bfree - + F2FS_OPTION(sbi).root_reserved_blocks; else buf->f_bavail = 0; @@ -1107,10 +1199,10 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #ifdef CONFIG_QUOTA struct f2fs_sb_info *sbi = F2FS_SB(sb); - if (sbi->s_jquota_fmt) { + if (F2FS_OPTION(sbi).s_jquota_fmt) { char *fmtname = ""; - switch (sbi->s_jquota_fmt) { + switch (F2FS_OPTION(sbi).s_jquota_fmt) { case QFMT_VFS_OLD: fmtname = "vfsold"; break; @@ -1124,14 +1216,17 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, seq_printf(seq, ",jqfmt=%s", fmtname); } - if (sbi->s_qf_names[USRQUOTA]) - seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA]) + seq_show_option(seq, "usrjquota", + F2FS_OPTION(sbi).s_qf_names[USRQUOTA]); - if (sbi->s_qf_names[GRPQUOTA]) - seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]) + seq_show_option(seq, "grpjquota", + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA]); - if (sbi->s_qf_names[PRJQUOTA]) - seq_show_option(seq, "prjjquota", sbi->s_qf_names[PRJQUOTA]); + if (F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + seq_show_option(seq, "prjjquota", + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]); #endif } @@ -1166,7 +1261,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noinline_xattr"); if (test_opt(sbi, INLINE_XATTR_SIZE)) seq_printf(seq, ",inline_xattr_size=%u", - sbi->inline_xattr_size); + F2FS_OPTION(sbi).inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1202,18 +1297,20 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); + seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", - sbi->root_reserved_blocks, - from_kuid_munged(&init_user_ns, sbi->s_resuid), - from_kgid_munged(&init_user_ns, sbi->s_resgid)); + F2FS_OPTION(sbi).root_reserved_blocks, + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) seq_printf(seq, ",fault_injection=%u", - sbi->fault_info.inject_rate); + F2FS_OPTION(sbi).fault_info.inject_rate); #endif #ifdef CONFIG_QUOTA if (test_opt(sbi, QUOTA)) @@ -1226,15 +1323,37 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",prjquota"); #endif f2fs_show_quota_options(seq, sbi->sb); + if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) + seq_printf(seq, ",whint_mode=%s", "user-based"); + else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) + seq_printf(seq, ",whint_mode=%s", "fs-based"); +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (F2FS_OPTION(sbi).test_dummy_encryption) + seq_puts(seq, ",test_dummy_encryption"); +#endif + + if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) + seq_printf(seq, ",alloc_mode=%s", "default"); + else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) + seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) + seq_printf(seq, ",fsync_mode=%s", "posix"); + else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + seq_printf(seq, ",fsync_mode=%s", "strict"); return 0; } static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ - sbi->active_logs = NR_CURSEG_TYPE; - sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE; + F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; + F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; + F2FS_OPTION(sbi).test_dummy_encryption = false; + sbi->readdir_ra = 1; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1244,7 +1363,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (f2fs_sb_mounted_blkzoned(sbi->sb)) { + if (f2fs_sb_has_blkzoned(sbi->sb)) { set_opt_mode(sbi, F2FS_MOUNT_LFS); set_opt(sbi, DISCARD); } else { @@ -1271,16 +1390,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; unsigned long old_sb_flags; - int err, active_logs; + int err; bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); -#ifdef CONFIG_F2FS_FAULT_INJECTION - struct f2fs_fault_info ffi = sbi->fault_info; -#endif #ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; int i, j; #endif @@ -1290,21 +1404,21 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; - active_logs = sbi->active_logs; #ifdef CONFIG_QUOTA - s_jquota_fmt = sbi->s_jquota_fmt; + org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { - s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { + org_mount_opt.s_qf_names[i] = + kstrdup(F2FS_OPTION(sbi).s_qf_names[i], + GFP_KERNEL); + if (!org_mount_opt.s_qf_names[i]) { for (j = 0; j < i; j++) - kfree(s_qf_names[j]); + kfree(org_mount_opt.s_qf_names[j]); return -ENOMEM; } } else { - s_qf_names[i] = NULL; + org_mount_opt.s_qf_names[i] = NULL; } } #endif @@ -1374,7 +1488,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) need_stop_gc = true; } - if (*flags & MS_RDONLY) { + if (*flags & MS_RDONLY || + F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) { writeback_inodes_sb(sb, WB_REASON_SYNC); sync_inodes_sb(sb); @@ -1400,7 +1515,7 @@ skip: #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) - kfree(s_qf_names[i]); + kfree(org_mount_opt.s_qf_names[i]); #endif /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -1418,18 +1533,14 @@ restore_gc: } restore_opts: #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = s_jquota_fmt; + F2FS_OPTION(sbi).s_jquota_fmt = org_mount_opt.s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = s_qf_names[i]; + kfree(F2FS_OPTION(sbi).s_qf_names[i]); + F2FS_OPTION(sbi).s_qf_names[i] = org_mount_opt.s_qf_names[i]; } #endif sbi->mount_opt = org_mount_opt; - sbi->active_logs = active_logs; sb->s_flags = old_sb_flags; -#ifdef CONFIG_F2FS_FAULT_INJECTION - sbi->fault_info = ffi; -#endif return err; } @@ -1551,8 +1662,8 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { - return dquot_quota_on_mount(sbi->sb, sbi->s_qf_names[type], - sbi->s_jquota_fmt, type); + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], + F2FS_OPTION(sbi).s_jquota_fmt, type); } int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) @@ -1571,7 +1682,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) } for (i = 0; i < MAXQUOTAS; i++) { - if (sbi->s_qf_names[i]) { + if (F2FS_OPTION(sbi).s_qf_names[i]) { err = f2fs_quota_on_mount(sbi, i); if (!err) { enabled = 1; @@ -1801,11 +1912,28 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len) static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* + * Encrypting the root directory is not allowed because fsck + * expects lost+found directory to exist and remain unencrypted + * if LOST_FOUND feature is enabled. + * + */ + if (f2fs_sb_has_lost_found(sbi->sb) && + inode->i_ino == F2FS_ROOT_INO(sbi)) + return -EPERM; + return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len, fs_data, XATTR_CREATE); } +static bool f2fs_dummy_context(struct inode *inode) +{ + return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); +} + static unsigned f2fs_max_namelen(struct inode *inode) { return S_ISLNK(inode->i_mode) ? @@ -1816,6 +1944,7 @@ static const struct fscrypt_operations f2fs_cryptops = { .key_prefix = "f2fs:", .get_context = f2fs_get_context, .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, .empty_dir = f2fs_empty_dir, .max_namelen = f2fs_max_namelen, }; @@ -1898,7 +2027,6 @@ static int __f2fs_commit_super(struct buffer_head *bh, lock_buffer(bh); if (super) memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_uptodate(bh); set_buffer_dirty(bh); unlock_buffer(bh); @@ -2185,6 +2313,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); + + init_rwsem(&sbi->sb_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2210,7 +2340,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) unsigned int n = 0; int err = -EIO; - if (!f2fs_sb_mounted_blkzoned(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi->sb)) return 0; if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != @@ -2338,7 +2468,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); + bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2349,7 +2479,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return err; /* write current valid superblock */ - bh = sb_getblk(sbi->sb, sbi->valid_super_block); + bh = sb_bread(sbi->sb, sbi->valid_super_block); if (!bh) return -EIO; err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); @@ -2421,7 +2551,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && - !f2fs_sb_mounted_blkzoned(sbi->sb)) { + !f2fs_sb_has_blkzoned(sbi->sb)) { f2fs_msg(sbi->sb, KERN_ERR, "Zoned block device feature not enabled\n"); return -EINVAL; @@ -2455,6 +2585,18 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) return 0; } +static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_i = SM_I(sbi); + + /* adjust parameters according to the volume size */ + if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) { + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + sm_i->dcc_info->discard_granularity = 1; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + } +} + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -2502,8 +2644,8 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; - sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); - sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) @@ -2516,7 +2658,7 @@ try_onemore: * devices, but mandatory for host-managed zoned block devices. */ #ifndef CONFIG_BLK_DEV_ZONED - if (f2fs_sb_mounted_blkzoned(sb)) { + if (f2fs_sb_has_blkzoned(sb)) { f2fs_msg(sb, KERN_ERR, "Zoned block device support is not enabled\n"); err = -EOPNOTSUPP; @@ -2732,7 +2874,7 @@ try_onemore: * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. */ - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) { f2fs_msg(sb, KERN_ERR, @@ -2807,6 +2949,8 @@ skip_recovery: f2fs_join_shrinker(sbi); + f2fs_tuning_parameters(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); @@ -2815,7 +2959,7 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA - if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif f2fs_sync_inode_meta(sbi); @@ -2859,7 +3003,7 @@ free_bio_info: free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); + kfree(F2FS_OPTION(sbi).s_qf_names[i]); #endif kfree(options); free_sb_buf: @@ -2956,8 +3100,13 @@ static int __init init_f2fs_fs(void) err = f2fs_create_root_stats(); if (err) goto free_filesystem; + err = f2fs_init_post_read_processing(); + if (err) + goto free_root_stats; return 0; +free_root_stats: + f2fs_destroy_root_stats(); free_filesystem: unregister_filesystem(&f2fs_fs_type); free_shrinker: @@ -2980,6 +3129,7 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index d978c7b6ea04..f33a56d6e6dd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -58,7 +58,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&sbi->fault_info; + return (unsigned char *)&F2FS_OPTION(sbi).fault_info; #endif return NULL; } @@ -92,10 +92,10 @@ static ssize_t features_show(struct f2fs_attr *a, if (!sb->s_bdev->bd_part) return snprintf(buf, PAGE_SIZE, "0\n"); - if (f2fs_sb_has_crypto(sb)) + if (f2fs_sb_has_encrypt(sb)) len += snprintf(buf, PAGE_SIZE - len, "%s", "encryption"); - if (f2fs_sb_mounted_blkzoned(sb)) + if (f2fs_sb_has_blkzoned(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sb)) @@ -116,6 +116,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_crtime(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_crtime"); + if (f2fs_sb_has_lost_found(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "lost_found"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -136,6 +139,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + __u8 (*extlist)[F2FS_EXTENSION_LEN] = + sbi->raw_super->extension_list; + int cold_count = le32_to_cpu(sbi->raw_super->extension_count); + int hot_count = sbi->raw_super->hot_ext_count; + int len = 0, i; + + len += snprintf(buf + len, PAGE_SIZE - len, + "cold file extenstion:\n"); + for (i = 0; i < cold_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + + len += snprintf(buf + len, PAGE_SIZE - len, + "hot file extenstion:\n"); + for (i = cold_count; i < cold_count + hot_count; i++) + len += snprintf(buf + len, PAGE_SIZE - len, "%s\n", + extlist[i]); + return len; + } + ui = (unsigned int *)(ptr + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); @@ -154,6 +178,41 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!ptr) return -EINVAL; + if (!strcmp(a->attr.name, "extension_list")) { + const char *name = strim((char *)buf); + bool set = true, hot; + + if (!strncmp(name, "[h]", 3)) + hot = true; + else if (!strncmp(name, "[c]", 3)) + hot = false; + else + return -EINVAL; + + name += 3; + + if (*name == '!') { + name++; + set = false; + } + + if (strlen(name) >= F2FS_EXTENSION_LEN) + return -EINVAL; + + down_write(&sbi->sb_lock); + + ret = update_extension_list(sbi, name, hot, set); + if (ret) + goto out; + + ret = f2fs_commit_super(sbi, false); + if (ret) + update_extension_list(sbi, name, hot, !set); +out: + up_write(&sbi->sb_lock); + return ret ? ret : count; + } + ui = (unsigned int *)(ptr + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); @@ -166,7 +225,7 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - sbi->root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -236,6 +295,7 @@ enum feat_id { FEAT_FLEXIBLE_INLINE_XATTR, FEAT_QUOTA_INO, FEAT_INODE_CRTIME, + FEAT_LOST_FOUND, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -251,6 +311,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_FLEXIBLE_INLINE_XATTR: case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: + case FEAT_LOST_FOUND: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -307,6 +368,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); +F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -329,6 +391,7 @@ F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); +F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -357,6 +420,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), + ATTR_LIST(extension_list), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), @@ -383,6 +447,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(flexible_inline_xattr), ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), + ATTR_LIST(lost_found), NULL, }; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 48fe91e86c2a..da43c4a22e1b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -747,11 +747,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c6a499b7547e..9398d1b70545 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -914,7 +914,7 @@ out: } /* - * This is a variaon of __jbd2_update_log_tail which checks for validity of + * This is a variation of __jbd2_update_log_tail which checks for validity of * provided log tail and locks j_checkpoint_mutex. So it is safe against races * with other threads updating log tail. */ @@ -1384,6 +1384,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, journal_superblock_t *sb = journal->j_superblock; int ret; + if (is_journal_aborted(journal)) + return -EIO; + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", tail_block, tail_tid); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d86c5e3176a1..600da1a4df29 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -345,7 +345,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!(sb->s_flags & MS_RDONLY)) + if (c && !(sb->s_flags & MS_RDONLY)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5f31ebd96c06..a2edb0049eb5 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -129,6 +129,8 @@ lockd(void *vrqstp) { int err = 0; struct svc_rqst *rqstp = vrqstp; + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -173,6 +175,8 @@ lockd(void *vrqstp) if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); return 0; } @@ -267,8 +271,6 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net) if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { nlm_shutdown_hosts_net(net); - cancel_delayed_work_sync(&ln->grace_period_end); - locks_end_grace(&ln->lockd_manager); svc_shutdown_net(serv, net); dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); } diff --git a/fs/namei.c b/fs/namei.c index e82d2955de00..ea6050b6134a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -219,9 +219,10 @@ getname_kernel(const char * filename) if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { + const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); diff --git a/fs/namespace.c b/fs/namespace.c index adbe44dda88f..b3d8d3d8f05f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1036,7 +1036,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 54313322ee5b..c8e90152b61b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -461,6 +461,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8ef6f70c9e25..0f397e62de5a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3025,6 +3025,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f .rpc_resp = &res, }; int status; + int i; bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_FH_EXPIRE_TYPE | @@ -3090,8 +3091,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->cache_consistency_bitmask[2] = 0; + + /* Avoid a regression due to buggy server */ + for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++) + res.exclcreat_bitmask[i] &= res.attr_bitmask[i]; memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, sizeof(server->exclcreat_bitmask)); + server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -7670,6 +7676,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + break; default: nfs4_schedule_lease_recovery(clp); } @@ -7748,7 +7760,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, if (status == 0) status = task->tk_status; rpc_put_task(task); - return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9a0b219ff74d..83fba40396ae 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1593,13 +1593,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); } -static void nfs4_reclaim_complete(struct nfs_client *clp, +static int nfs4_reclaim_complete(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops, struct rpc_cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) - (void)ops->reclaim_complete(clp, cred); + return ops->reclaim_complete(clp, cred); + return 0; } static void nfs4_clear_reclaim_server(struct nfs_server *server) @@ -1646,13 +1647,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { const struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; + int err; if (!nfs4_state_clear_reclaim_reboot(clp)) return; ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); - nfs4_reclaim_complete(clp, ops, cred); + err = nfs4_reclaim_complete(clp, ops, cred); put_rpccred(cred); + if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } static void nfs_delegation_clear_all(struct nfs_client *clp) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e0e5f7c3c99f..8a459b179183 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, u32 event_mask, void *data, int data_type) { - __u32 marks_mask, marks_ignored_mask; + __u32 marks_mask = 0, marks_ignored_mask = 0; struct path *path = data; pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" @@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - if (inode_mark && vfsmnt_mark) { - marks_mask = (vfsmnt_mark->mask | inode_mark->mask); - marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); - } else if (inode_mark) { - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if ((event_mask & FS_EVENT_ON_CHILD) && - !(inode_mark->mask & FS_EVENT_ON_CHILD)) - return false; - marks_mask = inode_mark->mask; - marks_ignored_mask = inode_mark->ignored_mask; - } else if (vfsmnt_mark) { - marks_mask = vfsmnt_mark->mask; - marks_ignored_mask = vfsmnt_mark->ignored_mask; - } else { - BUG(); + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (inode_mark && + (!(event_mask & FS_EVENT_ON_CHILD) || + (inode_mark->mask & FS_EVENT_ON_CHILD))) { + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; + } + + if (vfsmnt_mark) { + marks_mask |= vfsmnt_mark->mask; + marks_ignored_mask |= vfsmnt_mark->ignored_mask; } if (d_is_dir(path->dentry) && diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 220b04f04523..985a4cdae06d 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -272,6 +272,16 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name, return vfs_getxattr(realpath.dentry, name, value, size); } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct path realpath; @@ -296,7 +306,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { diff --git a/fs/proc/base.c b/fs/proc/base.c index 62facaf3971f..072dac45b102 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3090,8 +3090,8 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3484,8 +3484,8 @@ static const struct pid_entry tid_base_stuff[] = { ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/fs/proc/uid.c b/fs/proc/uid.c index 616d99b157c3..040591d341f8 100644 --- a/fs/proc/uid.c +++ b/fs/proc/uid.c @@ -286,6 +286,8 @@ static const struct inode_operations proc_uid_inode_operations = { int __init proc_uid_init(void) { proc_uid = proc_mkdir("uid", NULL); + if (!proc_uid) + return -ENOMEM; proc_uid->proc_iops = &proc_uid_inode_operations; proc_uid->proc_fops = &proc_uid_operations; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index a72097b625ef..00985f9db9f7 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super, if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; - reiserfs_warning(super, + reiserfs_warning(super, "sh-457", "journal_init_dev: Cannot open '%s': %i", jdev_name, result); return result; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1fd90c079537..0bb6de356451 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1728,8 +1728,11 @@ static void ubifs_remount_ro(struct ubifs_info *c) dbg_save_space_info(c); - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); @@ -1795,8 +1798,11 @@ static void ubifs_put_super(struct super_block *sb) int err; /* Synchronize write-buffers */ - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } /* * We are being cleanly unmounted which means the diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c104d4aed62a..de1414ada5a1 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -193,6 +193,11 @@ static inline void set_bdi_congested(struct backing_dev_info *bdi, int sync) set_wb_congested(bdi->wb.congested, sync); } +struct wb_lock_cookie { + bool locked; + unsigned long flags; +}; + #ifdef CONFIG_CGROUP_WRITEBACK /** diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 125bc67319b4..4bc6540d426b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -374,7 +374,7 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) /** * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction * @inode: target inode - * @lockedp: temp bool output param, to be passed to the end function + * @cookie: output param, to be passed to the end function * * The caller wants to access the wb associated with @inode but isn't * holding inode->i_lock, mapping->tree_lock or wb->list_lock. This @@ -382,12 +382,12 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) * association doesn't change until the transaction is finished with * unlocked_inode_to_wb_end(). * - * The caller must call unlocked_inode_to_wb_end() with *@lockdep - * afterwards and can't sleep during transaction. IRQ may or may not be - * disabled on return. + * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and + * can't sleep during the transaction. IRQs may or may not be disabled on + * return. */ static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { rcu_read_lock(); @@ -395,10 +395,10 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) * Paired with store_release in inode_switch_wb_work_fn() and * ensures that we see the new wb if we see cleared I_WB_SWITCH. */ - *lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; + cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; - if (unlikely(*lockedp)) - spin_lock_irq(&inode->i_mapping->tree_lock); + if (unlikely(cookie->locked)) + spin_lock_irqsave(&inode->i_mapping->tree_lock, cookie->flags); /* * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock. @@ -410,12 +410,14 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) /** * unlocked_inode_to_wb_end - end inode wb access transaction * @inode: target inode - * @locked: *@lockedp from unlocked_inode_to_wb_begin() + * @cookie: @cookie from unlocked_inode_to_wb_begin() */ -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { - if (unlikely(locked)) - spin_unlock_irq(&inode->i_mapping->tree_lock); + if (unlikely(cookie->locked)) + spin_unlock_irqrestore(&inode->i_mapping->tree_lock, + cookie->flags); rcu_read_unlock(); } @@ -462,12 +464,13 @@ static inline struct bdi_writeback *inode_to_wb(struct inode *inode) } static inline struct bdi_writeback * -unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp) +unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) { return inode_to_wb(inode); } -static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) +static inline void unlocked_inode_to_wb_end(struct inode *inode, + struct wb_lock_cookie *cookie) { } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6d73a04d0150..e7fd9490bcf6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -56,6 +56,7 @@ struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; unsigned int bi_flags; /* status, command, etc */ + unsigned short bi_write_hint; int bi_error; unsigned long bi_rw; /* bottom bits READ/WRITE, * top bits priority diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 073365c9808a..2ebfa01b7091 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -21,6 +21,7 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_EXTENSION_LEN 8 /* max size of extension */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ @@ -38,10 +39,10 @@ #define F2FS_MAX_QUOTAS 3 -#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ -#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ -#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ #define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) /* This flag is used by node and meta inodes, and by recovery */ @@ -101,7 +102,7 @@ struct f2fs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __le16 volume_name[MAX_VOLUME_NAME]; /* volume name */ __le32 extension_count; /* # of extensions below */ - __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __u8 extension_list[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];/* extension array */ __le32 cp_payload; __u8 version[VERSION_LEN]; /* the kernel version */ __u8 init_version[VERSION_LEN]; /* the initial kernel version */ @@ -110,12 +111,14 @@ struct f2fs_super_block { __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ - __u8 reserved[315]; /* valid reserved region */ + __u8 hot_ext_count; /* # of hot file extension */ + __u8 reserved[314]; /* valid reserved region */ } __packed; /* * For checkpoint */ +#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 @@ -302,6 +305,10 @@ struct f2fs_node { */ #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) #define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8) +#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \ + ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \ + BITS_PER_LONG * BITS_PER_LONG) + struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 20b7b1b41630..250f4d1ce9c5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -20,6 +20,7 @@ #include <linux/rwsem.h> #include <linux/capability.h> #include <linux/semaphore.h> +#include <linux/fcntl.h> #include <linux/fiemap.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> @@ -143,6 +144,9 @@ typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +/* File is capable of returning -EAGAIN if I/O will block */ +#define FMODE_NOWAIT ((__force fmode_t)0x8000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@ -326,9 +330,22 @@ struct page; struct address_space; struct writeback_control; +/* + * Write life time hint values. + */ +enum rw_hint { + WRITE_LIFE_NOT_SET = 0, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +}; + #define IOCB_EVENTFD (1 << 0) #define IOCB_APPEND (1 << 1) #define IOCB_DIRECT (1 << 2) +#define IOCB_NOWAIT (1 << 7) struct kiocb { struct file *ki_filp; @@ -336,6 +353,7 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; + enum rw_hint ki_hint; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -635,6 +653,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; unsigned int i_blkbits; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -1073,8 +1092,6 @@ struct file_lock_context { #define OFFT_OFFSET_MAX INT_LIMIT(off_t) #endif -#include <linux/fcntl.h> - extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 8641e56b8f8a..9e535af579e8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -13,42 +13,13 @@ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H -#include <linux/key.h> #include <linux/fs.h> -#include <linux/mm.h> -#include <linux/bio.h> -#include <linux/dcache.h> -#include <crypto/skcipher.h> -#include <uapi/linux/fs.h> #define FS_CRYPTO_BLOCK_SIZE 16 +struct fscrypt_ctx; struct fscrypt_info; -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - struct fscrypt_str { unsigned char *name; u32 len; @@ -67,86 +38,11 @@ struct fscrypt_name { #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - #if __FS_HAS_ENCRYPTION - -static inline struct page *fscrypt_control_page(struct page *page) -{ - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -} - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return (inode->i_crypt_info != NULL); -} - #include <linux/fscrypt_supp.h> - -#else /* !__FS_HAS_ENCRYPTION */ - -static inline struct page *fscrypt_control_page(struct page *page) -{ - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -} - -static inline bool fscrypt_has_encryption_key(const struct inode *inode) -{ - return 0; -} - +#else #include <linux/fscrypt_notsupp.h> -#endif /* __FS_HAS_ENCRYPTION */ +#endif /** * fscrypt_require_key - require an inode's encryption key @@ -287,4 +183,68 @@ static inline int fscrypt_prepare_setattr(struct dentry *dentry, return 0; } +/** + * fscrypt_prepare_symlink - prepare to create a possibly-encrypted symlink + * @dir: directory in which the symlink is being created + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @max_len: space the filesystem has available to store the symlink target + * @disk_link: (out) the on-disk symlink target being prepared + * + * This function computes the size the symlink target will require on-disk, + * stores it in @disk_link->len, and validates it against @max_len. An + * encrypted symlink may be longer than the original. + * + * Additionally, @disk_link->name is set to @target if the symlink will be + * unencrypted, but left NULL if the symlink will be encrypted. For encrypted + * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the + * on-disk target later. (The reason for the two-step process is that some + * filesystems need to know the size of the symlink target before creating the + * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) + * + * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, + * -ENOKEY if the encryption key is missing, or another -errno code if a problem + * occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_symlink(struct inode *dir, + const char *target, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(dir) || fscrypt_dummy_context_enabled(dir)) + return __fscrypt_prepare_symlink(dir, len, max_len, disk_link); + + disk_link->name = (unsigned char *)target; + disk_link->len = len + 1; + if (disk_link->len > max_len) + return -ENAMETOOLONG; + return 0; +} + +/** + * fscrypt_encrypt_symlink - encrypt the symlink target if needed + * @inode: symlink inode + * @target: plaintext symlink target + * @len: length of @target excluding null terminator + * @disk_link: (in/out) the on-disk symlink target being prepared + * + * If the symlink target needs to be encrypted, then this function encrypts it + * into @disk_link->name. fscrypt_prepare_symlink() must have been called + * previously to compute @disk_link->len. If the filesystem did not allocate a + * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one + * will be kmalloc()'ed and the filesystem will be responsible for freeing it. + * + * Return: 0 on success, -errno on failure + */ +static inline int fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + if (IS_ENCRYPTED(inode)) + return __fscrypt_encrypt_symlink(inode, target, len, disk_link); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index c4c6bf2c390e..44bd4fbd3ec5 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -13,7 +13,21 @@ #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return false; +} + /* crypto.c */ +static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) +{ +} + static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) { @@ -42,6 +56,11 @@ static inline int fscrypt_decrypt_page(const struct inode *inode, return -EOPNOTSUPP; } +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} static inline void fscrypt_restore_control_page(struct page *page) { @@ -115,16 +134,8 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) return; } -static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode, - u32 ilen) -{ - /* never happens */ - WARN_ON(1); - return 0; -} - static inline int fscrypt_fname_alloc_buffer(const struct inode *inode, - u32 ilen, + u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; @@ -143,13 +154,6 @@ static inline int fscrypt_fname_disk_to_usr(struct inode *inode, return -EOPNOTSUPP; } -static inline int fscrypt_fname_usr_to_disk(struct inode *inode, - const struct qstr *iname, - struct fscrypt_str *oname) -{ - return -EOPNOTSUPP; -} - static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { @@ -160,10 +164,13 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, } /* bio.c */ -static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, - struct bio *bio) +static inline void fscrypt_decrypt_bio(struct bio *bio) +{ +} + +static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio) { - return; } static inline void fscrypt_pullback_bio_page(struct page **page, bool restore) @@ -207,4 +214,27 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_symlink(struct inode *dir, + unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline int __fscrypt_encrypt_symlink(struct inode *inode, + const char *target, + unsigned int len, + struct fscrypt_str *disk_link) +{ + return -EOPNOTSUPP; +} + +static inline void *fscrypt_get_symlink(struct inode *inode, + const void *caddr, + unsigned int max_size) +{ + return ERR_PTR(-EOPNOTSUPP); +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 2db5e9706f60..9d1857302b73 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -10,8 +10,55 @@ #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H +#include <linux/mm.h> +#include <linux/slab.h> + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto operations for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + return inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode); +} + /* crypto.c */ -extern struct kmem_cache *fscrypt_info_cachep; +extern void fscrypt_enqueue_decrypt_work(struct work_struct *); extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, @@ -19,6 +66,12 @@ extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *, u64, gfp_t); extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int, unsigned int, u64); + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + extern void fscrypt_restore_control_page(struct page *); extern const struct dentry_operations fscrypt_d_ops; @@ -54,14 +107,11 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) kfree(fname->crypto_buf.name); } -extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32); extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, const struct fscrypt_str *, struct fscrypt_str *); -extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, - struct fscrypt_str *); #define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 @@ -138,7 +188,9 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, } /* bio.c */ -extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); +extern void fscrypt_decrypt_bio(struct bio *); +extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, + struct bio *bio); extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); @@ -152,5 +204,13 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *new_dentry, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, + unsigned int max_len, + struct fscrypt_str *disk_link); +extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, + unsigned int len, + struct fscrypt_str *disk_link); +extern void *fscrypt_get_symlink(struct inode *inode, const void *caddr, + unsigned int max_size); #endif /* _LINUX_FSCRYPT_SUPP_H */ diff --git a/include/linux/habmm.h b/include/linux/habmm.h index 966c5ee91be4..842cd27fd372 100644 --- a/include/linux/habmm.h +++ b/include/linux/habmm.h @@ -214,6 +214,11 @@ int32_t habmm_socket_recvfrom(int32_t handle, void *dst_buff, */ #define HABMM_EXP_MEM_TYPE_DMA 0x00000001 +/* + * this flag is used for export from dma_buf fd or import to dma_buf fd + */ +#define HABMM_EXPIMP_FLAGS_FD 0x00010000 + #define HAB_MAX_EXPORT_SIZE 0x8000000 /* diff --git a/include/linux/hid.h b/include/linux/hid.h index 251a1d382e23..fd86687f8119 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -793,7 +793,7 @@ extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); int hid_set_field(struct hid_field *, unsigned, __s32); -int hid_input_report(struct hid_device *, int type, u8 *, int, int); +int hid_input_report(struct hid_device *, int type, u8 *, u32, int); int hidinput_find_field(struct hid_device *hid, unsigned int type, unsigned int code, struct hid_field **field); struct hid_field *hidinput_get_led_field(struct hid_device *hid); unsigned int hidinput_count_leds(struct hid_device *hid); @@ -1098,13 +1098,13 @@ static inline void hid_hw_wait(struct hid_device *hdev) * * @report: the report we want to know the length */ -static inline int hid_report_len(struct hid_report *report) +static inline u32 hid_report_len(struct hid_report *report) { /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ return ((report->size - 1) >> 3) + 1 + (report->id > 0); } -int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size, +int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, int interrupt); /* HID quirks API */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index fe052e234906..bb1018882199 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -465,6 +465,7 @@ struct mlx4_update_qp_params { u16 rate_val; }; +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn); int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index a91b67b18a73..5c93f4a89afa 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -635,8 +635,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { diff --git a/include/linux/mm.h b/include/linux/mm.h index d2dcc8727bc8..34fcdede4604 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -241,10 +241,14 @@ extern pgprot_t protection_map[16]; * ->fault function. The vma's ->fault is responsible for returning a bitmask * of VM_FAULT_xxx flags that give details about how the fault was handled. * + * MM layer fills up gfp_mask for page allocations but fault handler might + * alter it if its implementation requires a different allocation context. + * * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { unsigned int flags; /* FAULT_FLAG_xxx flags */ + gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ void __user *virtual_address; /* Faulting virtual address */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b5421f6f155a..a6da214d0584 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -879,10 +879,10 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); -int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, - int offset, int len); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, - int len); +int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); +int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) diff --git a/include/linux/tty.h b/include/linux/tty.h index a1042afff99a..d67ceb3f5958 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -342,6 +342,7 @@ struct tty_file_private { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_HUPPING 19 /* Hangup in progress */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 07f27b15e6fe..881c5d46a66c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -996,9 +996,9 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth */ enum rate_info_bw { + RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, - RATE_INFO_BW_20, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h index 8716d5942b65..8fcf8908a694 100644 --- a/include/net/slhc_vj.h +++ b/include/net/slhc_vj.h @@ -127,6 +127,7 @@ typedef __u32 int32; */ struct cstate { byte_t cs_this; /* connection id number (xmit) */ + bool initialized; /* true if initialized */ struct cstate *next; /* next in ring (xmit) */ struct iphdr cs_ip; /* ip/tcp hdr from most recent packet */ struct tcphdr cs_tcp; diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0..6d30a01d281d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/include/sound/pcm_oss.h b/include/sound/pcm_oss.h index 760c969d885d..12bbf8c81112 100644 --- a/include/sound/pcm_oss.h +++ b/include/sound/pcm_oss.h @@ -57,6 +57,7 @@ struct snd_pcm_oss_runtime { char *buffer; /* vmallocated period */ size_t buffer_used; /* used length from period buffer */ struct mutex params_lock; + atomic_t rw_ref; /* concurrent read/write accesses */ #ifdef CONFIG_SND_PCM_OSS_PLUGINS struct snd_pcm_plugin *plugin_first; struct snd_pcm_plugin *plugin_last; diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 3b91ad5d5115..2dd096eea935 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -78,6 +78,7 @@ struct snd_rawmidi_runtime { size_t xruns; /* over/underruns counter */ /* misc */ spinlock_t lock; + struct mutex realloc_mutex; wait_queue_head_t sleep; /* event handler (new bytes, input only) */ void (*event)(struct snd_rawmidi_substream *substream); diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h index f5024c560d8f..9c4eb33c5a1d 100644 --- a/include/trace/events/preemptirq.h +++ b/include/trace/events/preemptirq.h @@ -56,15 +56,18 @@ DEFINE_EVENT(preemptirq_template, preempt_enable, #include <trace/define_trace.h> -#else /* !CONFIG_PREEMPTIRQ_EVENTS */ +#endif /* !CONFIG_PREEMPTIRQ_EVENTS */ +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING) #define trace_irq_enable(...) #define trace_irq_disable(...) -#define trace_preempt_enable(...) -#define trace_preempt_disable(...) #define trace_irq_enable_rcuidle(...) #define trace_irq_disable_rcuidle(...) +#endif + +#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT) +#define trace_preempt_enable(...) +#define trace_preempt_disable(...) #define trace_preempt_enable_rcuidle(...) #define trace_preempt_disable_rcuidle(...) - #endif diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index beed138bd359..f85ed3a5ef4d 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -43,6 +43,27 @@ /* (1U << 31) is reserved for signed error codes */ /* + * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the + * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on + * the specific file. + */ +#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11) +#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) +#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13) +#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14) + +/* + * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be + * used to clear any hints previously set. + */ +#define RWF_WRITE_LIFE_NOT_SET 0 +#define RWH_WRITE_LIFE_NONE 1 +#define RWH_WRITE_LIFE_SHORT 2 +#define RWH_WRITE_LIFE_MEDIUM 3 +#define RWH_WRITE_LIFE_LONG 4 +#define RWH_WRITE_LIFE_EXTREME 5 + +/* * Types of directory notifications that may be requested. */ #define DN_ACCESS 0x00000001 /* File accessed */ diff --git a/ipc/shm.c b/ipc/shm.c index 09267be8d27b..9fa852a6473b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -198,6 +198,12 @@ static int __shm_open(struct vm_area_struct *vma) if (IS_ERR(shp)) return PTR_ERR(shp); + if (shp->shm_file != sfd->file) { + /* ID was reused */ + shm_unlock(shp); + return -EINVAL; + } + shp->shm_atim = get_seconds(); shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; @@ -414,8 +420,9 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) int ret; /* - * In case of remap_file_pages() emulation, the file can represent - * removed IPC ID: propogate shm_lock() error to caller. + * In case of remap_file_pages() emulation, the file can represent an + * IPC ID that was removed, and possibly even reused by another shm + * segment already. Propagate this case as an error to caller. */ ret =__shm_open(vma); if (ret) @@ -439,6 +446,7 @@ static int shm_release(struct inode *ino, struct file *file) struct shm_file_data *sfd = shm_file_data(file); put_ipc_ns(sfd->ns); + fput(sfd->file); shm_file_data(file) = NULL; kfree(sfd); return 0; @@ -1198,7 +1206,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, file->f_mapping = shp->shm_file->f_mapping; sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); - sfd->file = shp->shm_file; + /* + * We need to take a reference to the real shm file to prevent the + * pointer from becoming stale in cases where the lifetime of the outer + * file extends beyond that of the shm segment. It's not usually + * possible, but it can happen during remap_file_pages() emulation as + * that unmaps the memory, then does ->mmap() via file reference only. + * We'll deny the ->mmap() if the shm segment was since removed, but to + * detect shm ID reuse we need to compare the file pointers. + */ + sfd->file = get_file(shp->shm_file); sfd->vm_ops = NULL; err = security_mmap_file(file, prot, flags); diff --git a/kernel/events/core.c b/kernel/events/core.c index 322f63370038..656d55d30f8d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5387,9 +5387,6 @@ static void perf_output_read_one(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } -/* - * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. - */ static void perf_output_read_group(struct perf_output_handle *handle, struct perf_event *event, u64 enabled, u64 running) @@ -5434,6 +5431,13 @@ static void perf_output_read_group(struct perf_output_handle *handle, #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ PERF_FORMAT_TOTAL_TIME_RUNNING) +/* + * XXX PERF_SAMPLE_READ vs inherited events seems difficult. + * + * The problem is that its both hard and excessively expensive to iterate the + * child list, not to mention that its impossible to IPI the children running + * on another CPU, from interrupt/NMI context. + */ static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { @@ -8167,9 +8171,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, local64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_FORMAT_GROUP on inherited events + * We currently do not support PERF_SAMPLE_READ on inherited events. + * See perf_output_read(). */ - if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) + if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)) goto err_ns; if (!has_branch_stack(event)) diff --git a/kernel/futex.c b/kernel/futex.c index a09c1dd1f659..760a97da1050 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -470,6 +470,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; struct page *page, *page_head; + struct address_space *mapping; int err, ro = 0; /* @@ -555,7 +556,19 @@ again: } #endif - lock_page(page_head); + /* + * The treatment of mapping from this point on is critical. The page + * lock protects many things but in this context the page lock + * stabilizes mapping, prevents inode freeing in the shared + * file-backed region case and guards against movement to swap cache. + * + * Strictly speaking the page lock is not needed in all cases being + * considered here and page lock forces unnecessarily serialization + * From this point on, mapping will be re-verified if necessary and + * page lock will be acquired only if it is unavoidable + */ + + mapping = READ_ONCE(page_head->mapping); /* * If page_head->mapping is NULL, then it cannot be a PageAnon @@ -572,18 +585,31 @@ again: * shmem_writepage move it from filecache to swapcache beneath us: * an unlikely race, but we do need to retry for page_head->mapping. */ - if (!page_head->mapping) { - int shmem_swizzled = PageSwapCache(page_head); + if (unlikely(!mapping)) { + int shmem_swizzled; + + /* + * Page lock is required to identify which special case above + * applies. If this is really a shmem page then the page lock + * will prevent unexpected transitions. + */ + lock_page(page); + shmem_swizzled = PageSwapCache(page) || page->mapping; unlock_page(page_head); put_page(page_head); + if (shmem_swizzled) goto again; + return -EFAULT; } /* * Private mappings are handled in a simple way. * + * If the futex key is stored on an anonymous page, then the associated + * object is the mm which is implicitly pinned by the calling process. + * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to * the object not the particular process. @@ -601,16 +627,74 @@ again: key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; + + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + } else { + struct inode *inode; + + /* + * The associated futex object in this case is the inode and + * the page->mapping must be traversed. Ordinarily this should + * be stabilised under page lock but it's not strictly + * necessary in this case as we just want to pin the inode, not + * update the radix tree or anything like that. + * + * The RCU read lock is taken as the inode is finally freed + * under RCU. If the mapping still matches expectations then the + * mapping->host can be safely accessed as being a valid inode. + */ + rcu_read_lock(); + + if (READ_ONCE(page_head->mapping) != mapping) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + inode = READ_ONCE(mapping->host); + if (!inode) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* + * Take a reference unless it is about to be freed. Previously + * this reference was taken by ihold under the page lock + * pinning the inode in place so i_lock was unnecessary. The + * only way for this check to fail is if the inode was + * truncated in parallel so warn for now if this happens. + * + * We are not calling into get_futex_key_refs() in file-backed + * cases, therefore a successful atomic_inc return below will + * guarantee that get_futex_key() will still imply smp_mb(); (B). + */ + if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + rcu_read_unlock(); + put_page(page_head); + + goto again; + } + + /* Should be impossible but lets be paranoid for now */ + if (WARN_ON_ONCE(inode->i_mapping != mapping)) { + err = -EFAULT; + rcu_read_unlock(); + iput(inode); + + goto out; + } + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = page_head->mapping->host; + key->shared.inode = inode; key->shared.pgoff = basepage_index(page); + rcu_read_unlock(); } - get_futex_key_refs(key); /* implies MB (B) */ - out: - unlock_page(page_head); put_page(page_head); return err; } diff --git a/kernel/pid.c b/kernel/pid.c index b17263be9082..5fe7cdb6d05f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -322,8 +322,10 @@ struct pid *alloc_pid(struct pid_namespace *ns) } if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) + if (pid_ns_prepare_proc(ns)) { + disable_pid_allocation(ns); goto out_free; + } } get_pid_ns(ns); diff --git a/kernel/resource.c b/kernel/resource.c index c09d484f7b5f..73348f574163 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -611,7 +611,8 @@ static int __find_resource(struct resource *root, struct resource *old, alloc.start = constraint->alignf(constraint->alignf_data, &avail, size, constraint->align); alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { + if (alloc.start <= alloc.end && + resource_contains(&avail, &alloc)) { new->start = alloc.start; new->end = alloc.end; return 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 23e37b0674df..f962ab1eb046 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2402,7 +2402,8 @@ void task_numa_work(struct callback_head *work) return; - down_read(&mm->mmap_sem); + if (!down_read_trylock(&mm->mmap_sem)) + return; vma = find_vma(mm, start); if (!vma) { reset_ptenuma_scan(p); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index beafdf94b3b5..79fadcad21ff 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -435,6 +435,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer) { debug_object_free(timer, &hrtimer_debug_descr); } +EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); #else static inline void debug_hrtimer_init(struct hrtimer *timer) { } diff --git a/mm/filemap.c b/mm/filemap.c index 8b2cf0f6a529..750af2219081 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -573,7 +573,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) VM_BUG_ON_PAGE(!PageLocked(new), new); VM_BUG_ON_PAGE(new->mapping, new); - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK); if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); @@ -632,7 +632,7 @@ static int __add_to_page_cache_locked(struct page *page, return error; } - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); + error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK); if (error) { if (!huge) mem_cgroup_cancel_charge(page, memcg); @@ -1194,8 +1194,7 @@ no_page: if (fgp_flags & FGP_ACCESSED) __SetPageReferenced(page); - err = add_to_page_cache_lru(page, mapping, offset, - gfp_mask & GFP_RECLAIM_MASK); + err = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -1829,19 +1828,18 @@ EXPORT_SYMBOL(generic_file_read_iter); * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */ -static int page_cache_read(struct file *file, pgoff_t offset) +static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) { struct address_space *mapping = file->f_mapping; struct page *page; int ret; do { - page = page_cache_alloc_cold(mapping); + page = __page_cache_alloc(gfp_mask|__GFP_COLD); if (!page) return -ENOMEM; - ret = add_to_page_cache_lru(page, mapping, offset, - mapping_gfp_constraint(mapping, GFP_KERNEL)); + ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) @@ -2022,7 +2020,7 @@ no_cached_page: * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, offset); + error = page_cache_read(file, offset, vmf->gfp_mask); /* * The page we want has now been added to the page cache. diff --git a/mm/memory.c b/mm/memory.c index 291cbf54b347..78ab57141731 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1990,6 +1990,20 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo copy_user_highpage(dst, src, va, vma); } +static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) +{ + struct file *vm_file = vma->vm_file; + + if (vm_file) + return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; + + /* + * Special mappings (e.g. VDSO) do not have any file so fake + * a default GFP_KERNEL for them. + */ + return GFP_KERNEL; +} + /* * Notify the address space that the page is about to become writable so that * it can prohibit this or wait for the page to get into an appropriate state. @@ -2005,6 +2019,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page, vmf.virtual_address = (void __user *)(address & PAGE_MASK); vmf.pgoff = page->index; vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.page = page; vmf.cow_page = NULL; @@ -2771,6 +2786,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.flags = flags; vmf.page = NULL; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vmf.cow_page = cow_page; ret = vma->vm_ops->fault(vma, &vmf); @@ -2937,6 +2953,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address, vmf.pgoff = pgoff; vmf.max_pgoff = max_pgoff; vmf.flags = flags; + vmf.gfp_mask = __get_fault_gfp_mask(vma); vma->vm_ops->map_pages(vma, &vmf); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c5bf17598afa..a98dae1bdcff 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2526,13 +2526,13 @@ void account_page_redirty(struct page *page) if (mapping && mapping_cap_account_dirty(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; - bool locked; + struct wb_lock_cookie cookie = {}; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); current->nr_dirtied--; dec_zone_page_state(page, NR_DIRTIED); dec_wb_stat(wb, WB_DIRTIED); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); } } EXPORT_SYMBOL(account_page_redirty); @@ -2638,15 +2638,15 @@ void cancel_dirty_page(struct page *page) struct inode *inode = mapping->host; struct bdi_writeback *wb; struct mem_cgroup *memcg; - bool locked; + struct wb_lock_cookie cookie = {}; memcg = mem_cgroup_begin_page_stat(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) account_page_cleaned(page, mapping, memcg, wb); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); mem_cgroup_end_page_stat(memcg); } else { ClearPageDirty(page); @@ -2679,7 +2679,7 @@ int clear_page_dirty_for_io(struct page *page) struct inode *inode = mapping->host; struct bdi_writeback *wb; struct mem_cgroup *memcg; - bool locked; + struct wb_lock_cookie cookie = {}; /* * Yes, Virginia, this is indeed insane. @@ -2717,14 +2717,14 @@ int clear_page_dirty_for_io(struct page *page) * exclusion. */ memcg = mem_cgroup_begin_page_stat(page); - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &cookie); if (TestClearPageDirty(page)) { mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_DIRTY); dec_zone_page_state(page, NR_FILE_DIRTY); dec_wb_stat(wb, WB_RECLAIMABLE); ret = 1; } - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &cookie); mem_cgroup_end_page_stat(memcg); return ret; } diff --git a/mm/slab.c b/mm/slab.c index 8fc762c178bd..80ca19a122f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3942,7 +3942,8 @@ next: next_reap_node(); out: /* Set up the next iteration */ - schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC)); + schedule_delayed_work_on(smp_processor_id(), work, + round_jiffies_relative(REAPTIMEOUT_AC)); } #ifdef CONFIG_SLABINFO diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 89da689b6433..ac9791dd4768 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -29,6 +29,7 @@ #include <linux/net_tstamp.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/phy.h> #include <net/arp.h> #include "vlan.h" @@ -654,8 +655,11 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev, { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops; + struct phy_device *phydev = vlan->real_dev->phydev; - if (ops->get_ts_info) { + if (phydev && phydev->drv && phydev->drv->ts_info) { + return phydev->drv->ts_info(phydev, info); + } else if (ops->get_ts_info) { return ops->get_ts_info(vlan->real_dev, info); } else { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index da4078651c22..5b95477c3453 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -716,6 +716,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + bool changed = false; /* If Connectionless Slave Broadcast master role is supported * enable all necessary events for it. @@ -725,6 +726,7 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[1] |= 0x80; /* Synchronization Train Complete */ events[2] |= 0x10; /* Slave Page Response Timeout */ events[2] |= 0x20; /* CSB Channel Map Change */ + changed = true; } /* If Connectionless Slave Broadcast slave role is supported @@ -735,13 +737,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req) events[2] |= 0x02; /* CSB Receive */ events[2] |= 0x04; /* CSB Timeout */ events[2] |= 0x08; /* Truncated Page Complete */ + changed = true; } /* Enable Authenticated Payload Timeout Expired event if supported */ - if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) + if (lmp_ping_capable(hdev) || hdev->le_features[0] & HCI_LE_PING) { events[2] |= 0x80; + changed = true; + } - hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); + /* Some Broadcom based controllers indicate support for Set Event + * Mask Page 2 command, but then actually do not support it. Since + * the default value is all bits set to zero, the command is only + * required if the event mask has to be changed. In case no change + * to the event mask is needed, skip this command. + */ + if (changed) + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, + sizeof(events), events); } static void hci_init3_req(struct hci_request *req, unsigned long opt) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index bc95e48d5cfb..378c9ed00d40 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -295,6 +295,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) u32 yes; struct crush_rule *r; + err = -EINVAL; ceph_decode_32_safe(p, end, yes, bad); if (!yes) { dout("crush_decode NO rule %d off %x %p to %p\n", diff --git a/net/core/dev.c b/net/core/dev.c index 129e188d7722..154da3b1348b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -991,7 +991,7 @@ bool dev_valid_name(const char *name) { if (*name == '\0') return false; - if (strlen(name) >= IFNAMSIZ) + if (strnlen(name, IFNAMSIZ) == IFNAMSIZ) return false; if (!strcmp(name, ".") || !strcmp(name, "..")) return false; @@ -2517,7 +2517,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) return 0; - eth = (struct ethhdr *)skb_mac_header(skb); + eth = (struct ethhdr *)skb->data; type = eth->h_proto; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eff329e9c23b..e713c75695f9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1132,10 +1132,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - if (new & NUD_CONNECTED) - neigh->confirmed = jiffies; - neigh->updated = jiffies; - /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */ @@ -1159,6 +1155,16 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } } + /* Update timestamps only once we know we will make a change to the + * neighbour entry. Otherwise we risk to move the locktime window with + * noop updates and ignore relevant ARP updates. + */ + if (new != old || lladdr != neigh->ha) { + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + neigh->updated = jiffies; + } + if (new != old) { neigh_del_timer(neigh); if (new & NUD_PROBE) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b5c351d2830b..ccd20669ac00 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -310,6 +310,25 @@ out_undo: goto out; } +static int __net_init net_defaults_init_net(struct net *net) +{ + net->core.sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations net_defaults_ops = { + .init = net_defaults_init_net, +}; + +static __init int net_defaults_init(void) +{ + if (register_pernet_subsys(&net_defaults_ops)) + panic("Cannot initialize net default settings"); + + return 0; +} + +core_initcall(net_defaults_init); #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2549fa05b564..3c5e3c022232 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2577,7 +2577,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - skb_shinfo(skb1)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & + SKBTX_SHARED_FRAG; if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -3141,8 +3142,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & - SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3355,24 +3356,18 @@ void __init skb_init(void) NULL); } -/** - * skb_to_sgvec - Fill a scatter-gather list from a socket buffer - * @skb: Socket buffer containing the buffers to be mapped - * @sg: The scatter-gather list to map into - * @offset: The offset into the buffer's contents to start mapping - * @len: Length of buffer space to be mapped - * - * Fill the specified scatter-gather list with mappings/pointers into a - * region of the buffer space attached to a socket buffer. - */ static int -__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, + unsigned int recursion_level) { int start = skb_headlen(skb); int i, copy = start - offset; struct sk_buff *frag_iter; int elt = 0; + if (unlikely(recursion_level >= 24)) + return -EMSGSIZE; + if (copy > 0) { if (copy > len) copy = len; @@ -3391,6 +3386,8 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; if (copy > len) copy = len; @@ -3405,16 +3402,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) } skb_walk_frags(skb, frag_iter) { - int end; + int end, ret; WARN_ON(start > offset + len); end = start + frag_iter->len; if ((copy = end - offset) > 0) { + if (unlikely(elt && sg_is_last(&sg[elt - 1]))) + return -EMSGSIZE; + if (copy > len) copy = len; - elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start, - copy); + ret = __skb_to_sgvec(frag_iter, sg+elt, offset - start, + copy, recursion_level + 1); + if (unlikely(ret < 0)) + return ret; + elt += ret; if ((len -= copy) == 0) return elt; offset += copy; @@ -3425,6 +3428,31 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/** + * skb_to_sgvec - Fill a scatter-gather list from a socket buffer + * @skb: Socket buffer containing the buffers to be mapped + * @sg: The scatter-gather list to map into + * @offset: The offset into the buffer's contents to start mapping + * @len: Length of buffer space to be mapped + * + * Fill the specified scatter-gather list with mappings/pointers into a + * region of the buffer space attached to a socket buffer. Returns either + * the number of scatterlist items used, or -EMSGSIZE if the contents + * could not fit. + */ +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int nsg = __skb_to_sgvec(skb, sg, offset, len, 0); + + if (nsg <= 0) + return nsg; + + sg_mark_end(&sg[nsg - 1]); + + return nsg; +} +EXPORT_SYMBOL_GPL(skb_to_sgvec); + /* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given * sglist without mark the sg which contain last skb data as the end. * So the caller can mannipulate sg list as will when padding new data after @@ -3447,19 +3475,11 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { - return __skb_to_sgvec(skb, sg, offset, len); + return __skb_to_sgvec(skb, sg, offset, len, 0); } EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); -int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) -{ - int nsg = __skb_to_sgvec(skb, sg, offset, len); - sg_mark_end(&sg[nsg - 1]); - - return nsg; -} -EXPORT_SYMBOL_GPL(skb_to_sgvec); /** * skb_cow_data - Check that a socket buffer's data buffers are writable @@ -3741,7 +3761,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, return; if (tsonly) { - skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags; + skb_shinfo(skb)->tx_flags |= skb_shinfo(orig_skb)->tx_flags & + SKBTX_ANY_TSTAMP; skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 6578a0a2f708..32898247d8bf 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -429,8 +429,6 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl; - net->core.sysctl_somaxconn = SOMAXCONN; - tbl = netns_core_table; if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a548be247e15..47b397264f24 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -302,12 +302,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: @@ -689,12 +689,12 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) skb->sk = sk; skb->protocol = htons(ETH_P_IEEE802154); - dev_put(dev); - err = dev_queue_xmit(skb); if (err > 0) err = net_xmit_errno(err); + dev_put(dev); + return err ?: size; out_skb: diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 22377c8ff14b..e8f862358518 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -220,7 +220,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -393,7 +395,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, ihl); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cb5eb649ad5f..bfa79831873f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -437,7 +437,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) /*unsigned long now; */ struct net *net = dev_net(dev); - rt = ip_route_output(net, sip, tip, 0, 0); + rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev)); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { @@ -658,6 +658,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha; + unsigned char *tha = NULL; __be32 sip, tip; u16 dev_type = dev->type; int addr_type; @@ -729,6 +730,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) break; #endif default: + tha = arp_ptr; arp_ptr += dev->addr_len; } memcpy(&tip, arp_ptr, 4); @@ -839,8 +841,18 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb) It is possible, that this option should be enabled for some devices (strip is candidate) */ - is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip && - addr_type == RTN_UNICAST; + is_garp = tip == sip && addr_type == RTN_UNICAST; + + /* Unsolicited ARP _replies_ also require target hwaddr to be + * the same as source. + */ + if (is_garp && arp->ar_op == htons(ARPOP_REPLY)) + is_garp = + /* IPv4 over IEEE 1394 doesn't provide target + * hardware address field in its ARP payload. + */ + tha && + !memcmp(tha, sha, dev->addr_len); if (!n && ((arp->ar_op == htons(ARPOP_REPLY) && diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 20fb25e3027b..3d8021d55336 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -268,10 +268,11 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); - + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + if (unlikely(err < 0)) + goto error; aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); @@ -481,7 +482,9 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) } sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + err = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out; aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 80e2d1b0c08c..3d62feb65932 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -253,13 +253,14 @@ static struct net_device *__ip_tunnel_create(struct net *net, struct net_device *dev; char name[IFNAMSIZ]; - if (parms->name[0]) + err = -E2BIG; + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + goto failed; strlcpy(name, parms->name, IFNAMSIZ); - else { - if (strlen(ops->kind) > (IFNAMSIZ - 3)) { - err = -E2BIG; + } else { + if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - } strlcpy(name, ops->kind, IFNAMSIZ); strncat(name, "%d", 2); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1a8e95d2dd39..da83648f95f8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -118,6 +118,7 @@ int sysctl_tcp_default_init_rwnd __read_mostly = TCP_INIT_CWND * 2; #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -3544,7 +3545,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (before(ack, prior_snd_una)) { /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ if (before(ack, prior_snd_una - tp->max_window)) { - tcp_send_challenge_ack(sk, skb); + if (!(flag & FLAG_NO_CHALLENGE_ACK)) + tcp_send_challenge_ack(sk, skb); return -1; } goto old_ack; @@ -5834,13 +5836,17 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; + FLAG_UPDATE_TS_RECENT | + FLAG_NO_CHALLENGE_ACK) > 0; + if (!acceptable) { + if (sk->sk_state == TCP_SYN_RECV) + return 1; /* send one RST */ + tcp_send_challenge_ack(sk, skb); + goto discard; + } switch (sk->sk_state) { case TCP_SYN_RECV: - if (!acceptable) - return 1; - if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); @@ -5909,14 +5915,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * our SYNACK so stop the SYNACK timer. */ if (req) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 40c29712f32a..199658afa68b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -986,7 +986,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; - ifa->flags = flags | IFA_F_TENTATIVE; + ifa->flags = flags; + /* No need to add the TENTATIVE flag for addresses with NODAD */ + if (!(flags & IFA_F_NODAD)) + ifa->flags |= IFA_F_TENTATIVE; ifa->valid_lft = valid_lft; ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 189eb10b742d..e742c4deb13d 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -423,7 +423,9 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ @@ -603,7 +605,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->hop_limit = 0; sg_init_table(sg, nfrags + sglists); - skb_to_sgvec_nomark(skb, sg, 0, skb->len); + err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); + if (unlikely(err < 0)) + goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index cbcdd5db31f4..44a2010e2076 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -248,9 +248,11 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->spi = x->id.spi; sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, - (unsigned char *)esph - skb->data, - assoclen + ivlen + clen + alen); + err = skb_to_sgvec(skb, sg, + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); + if (unlikely(err < 0)) + goto error; aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); @@ -423,7 +425,9 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) + goto out; aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); aead_request_set_ad(req, assoclen); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 85afef175cf9..6e496c3dd8ef 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -320,11 +320,13 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, if (t || !create) return t; - if (parms->name[0]) + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + return NULL; strlcpy(name, parms->name, IFNAMSIZ); - else + } else { strcpy(name, "ip6gre%d"); - + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6gre_tunnel_setup); if (!dev) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3ef81c387923..bfa710e8b615 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -340,6 +340,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); skb_sender_cpu_clear(skb); return dst_output(net, sk, skb); } @@ -534,8 +538,6 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); @@ -1276,7 +1278,7 @@ static int __ip6_append_data(struct sock *sk, unsigned int flags, int dontfrag) { struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; @@ -1312,6 +1314,12 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit + * the first fragment + */ + if (headersize + transhdrlen > mtu) + goto emsgsize; + if (cork->length + length > mtu - headersize && dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_RAW)) { @@ -1327,9 +1335,8 @@ static int __ip6_append_data(struct sock *sk, if (cork->length + length > maxnonfragsize - headersize) { emsgsize: - ipv6_local_error(sk, EMSGSIZE, fl6, - mtu - headersize + - sizeof(struct ipv6hdr)); + pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); + ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); return -EMSGSIZE; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 27b00c4e642d..3c2468bd0b7c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -286,13 +286,16 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; - int err = -ENOMEM; + int err = -E2BIG; - if (p->name[0]) + if (p->name[0]) { + if (!dev_valid_name(p->name)) + goto failed; strlcpy(name, p->name, IFNAMSIZ); - else + } else { sprintf(name, "ip6tnl%%d"); - + } + err = -ENOMEM; dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!dev) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 00111ac94251..b8bf123f7f79 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -212,10 +212,13 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p char name[IFNAMSIZ]; int err; - if (p->name[0]) + if (p->name[0]) { + if (!dev_valid_name(p->name)) + goto failed; strlcpy(name, p->name, IFNAMSIZ); - else + } else { sprintf(name, "ip6_vti%%d"); + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); if (!dev) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5842430a122a..6b65f3bf7270 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -844,6 +844,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_node *fn; struct rt6_info *rt; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + flags &= ~RT6_LOOKUP_F_IFACE; + read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bbba435d0639..51f7c32f04d7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -244,11 +244,13 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (!create) goto failed; - if (parms->name[0]) + if (parms->name[0]) { + if (!dev_valid_name(parms->name)) + goto failed; strlcpy(name, parms->name, IFNAMSIZ); - else + } else { strcpy(name, "sit%d"); - + } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); if (!dev) @@ -690,6 +692,7 @@ static int ipip6_rcv(struct sk_buff *skb) if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6))) goto out; + iph = ip_hdr(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { diff --git a/net/key/af_key.c b/net/key/af_key.c index 6482b001f19a..15150b412930 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3305,7 +3305,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) { + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index fb3248ff8b48..ae3438685caa 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -732,6 +732,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || + (session->offset && + nla_put_u16(skb, L2TP_ATTR_OFFSET, session->offset)) || (session->cookie_len && nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0])) || diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index bb8edb9ef506..1e698768aca8 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -309,6 +309,8 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) int rc = -EINVAL; dprintk("%s: binding %02X\n", __func__, addr->sllc_sap); + + lock_sock(sk); if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr))) goto out; rc = -EAFNOSUPPORT; @@ -380,6 +382,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) out_put: llc_sap_put(sap); out: + release_sock(sk); return rc; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f3deee7da389..c16b4da20db2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4326,6 +4326,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data)) return -EINVAL; + /* If a reconfig is happening, bail out */ + if (local->in_reconfig) + return -EBUSY; + if (assoc) { rcu_read_lock(); have_sta = sta_info_get(sdata, cbss->bssid); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4ece07c68b3f..c68e020427ab 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -887,8 +887,13 @@ restart: } out: local_bh_enable(); - if (last) + if (last) { + /* nf ct hash resize happened, now clear the leftover. */ + if ((struct nf_conn *)cb->args[1] == last) + cb->args[1] = 0; + nf_ct_put(last); + } return skb->len; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f59d82f0aa97..83c0f56d05cb 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1034,6 +1034,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family != AF_NETLINK) return -EINVAL; + if (alen < sizeof(struct sockaddr_nl)) + return -EINVAL; + if ((nladdr->nl_groups || nladdr->nl_pid) && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; diff --git a/net/rds/bind.c b/net/rds/bind.c index b22ea956522b..e29b47193645 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -108,6 +108,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) rs, &addr, (int)ntohs(*port)); break; } else { + rs->rs_bound_addr = 0; rds_sock_put(rs); ret = -ENOMEM; break; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index d7a9ab5a9d9c..6c65fb229e50 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -209,7 +209,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; + int nsg, err; sp = rxrpc_skb(skb); @@ -240,7 +240,9 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, len &= ~(call->conn->size_align - 1); sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, len); + err = skb_to_sgvec(skb, sg, 0, len); + if (unlikely(err < 0)) + return err; crypto_blkcipher_encrypt_iv(&desc, sg, sg, len); _leave(" = 0"); @@ -336,7 +338,7 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, struct sk_buff *trailer; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(""); @@ -348,7 +350,9 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + ret = skb_to_sgvec(skb, sg, 0, 8); + if (unlikely(ret < 0)) + return ret; /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -411,7 +415,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, struct sk_buff *trailer; u32 data_size, buf; u16 check; - int nsg; + int nsg, ret; _enter(",{%d}", skb->len); @@ -430,7 +434,12 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) { + if (sg != _sg) + kfree(sg); + return ret; + } /* decrypt from the session key */ token = call->conn->key->payload.data[0]; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 694a06f1e0d5..f44fea22d69c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -101,8 +101,10 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, a->order = n_i; nest = nla_nest_start(skb, a->order); - if (nest == NULL) + if (nest == NULL) { + index--; goto nla_put_failure; + } err = tcf_action_dump_1(skb, a, 0, 0); if (err < 0) { index--; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 0bc6f912f870..bd155e59be1c 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -249,10 +249,14 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) { - if (cfg->is_ebpf) - bpf_prog_put(cfg->filter); - else - bpf_prog_destroy(cfg->filter); + struct bpf_prog *filter = cfg->filter; + + if (filter) { + if (cfg->is_ebpf) + bpf_prog_put(filter); + else + bpf_prog_destroy(filter); + } kfree(cfg->bpf_ops); kfree(cfg->bpf_name); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 481806b43de8..edb8514b4e00 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -723,8 +723,10 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) sctp_v6_map_v4(addr); } - if (addr->sa.sa_family == AF_INET) + if (addr->sa.sa_family == AF_INET) { + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); + } return sizeof(struct sockaddr_in6); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index df6a4b2d0728..13c7f42b7040 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -335,11 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, if (!opt->pf->af_supported(addr->sa.sa_family, opt)) return NULL; - /* V4 mapped address are really of AF_INET family */ - if (addr->sa.sa_family == AF_INET6 && - ipv6_addr_v4mapped(&addr->v6.sin6_addr) && - !opt->pf->af_supported(AF_INET, opt)) - return NULL; + if (addr->sa.sa_family == AF_INET6) { + if (len < SIN6_LEN_RFC2133) + return NULL; + /* V4 mapped address are really of AF_INET family */ + if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) && + !opt->pf->af_supported(AF_INET, opt)) + return NULL; + } /* If we get this far, af is valid. */ af = sctp_get_af_specific(addr->sa.sa_family); @@ -1518,7 +1521,7 @@ static void sctp_close(struct sock *sk, long timeout) pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; sk->sk_state = SCTP_SS_CLOSING; @@ -1569,7 +1572,7 @@ static void sctp_close(struct sock *sk, long timeout) * held and that should be grabbed before socket lock. */ spin_lock_bh(&net->sctp.addr_wq_lock); - bh_lock_sock(sk); + bh_lock_sock_nested(sk); /* Hold the sock, since sk_common_release() will put sock_put() * and we have just a little more cleanup. diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index d81186d34558..9103dd15511c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1375,6 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry) struct dentry *clnt_dir = pipe_dentry->d_parent; struct dentry *gssd_dir = clnt_dir->d_parent; + dget(pipe_dentry); __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry); __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1); __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 728d65fbab0c..c9c0976d3bbb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2363,7 +2363,12 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EHOSTUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* retry with existing socket, after a delay */ + /* + * xs_tcp_force_close() wakes tasks with -EIO. + * We need to wake them first to ensure the + * correct error code. + */ + xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); goto out; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a750f330b8dd..c6ab4da4b8e2 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1794,32 +1794,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb) static int __init x25_init(void) { - int rc = proto_register(&x25_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&x25_proto, 0); + if (rc) goto out; rc = sock_register(&x25_family_ops); - if (rc != 0) + if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); - if (rc != 0) + if (rc) goto out_sock; - pr_info("Linux Version 0.2\n"); + rc = x25_register_sysctl(); + if (rc) + goto out_dev; - x25_register_sysctl(); rc = x25_proc_init(); - if (rc != 0) - goto out_dev; + if (rc) + goto out_sysctl; + + pr_info("Linux Version 0.2\n"); + out: return rc; +out_sysctl: + x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: + dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index 43239527a205..703d46aae7a2 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = { { 0, }, }; -void __init x25_register_sysctl(void) +int __init x25_register_sysctl(void) { x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table); + if (!x25_table_header) + return -ENOMEM; + return 0; } void x25_unregister_sysctl(void) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7dfa35d223e6..70535b8ee4d6 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1208,6 +1208,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig) x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; + x->replay = orig->replay; + x->preplay = orig->preplay; return x; diff --git a/scripts/tags.sh b/scripts/tags.sh index 262889046703..45e246595d10 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -106,6 +106,7 @@ all_compiled_sources() case "$i" in *.[cS]) j=${i/\.[cS]/\.o} + j="${j#$tree}" if [ -e $j ]; then echo $i fi diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 13bb3b409b5c..e3f8891c5d72 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1945,8 +1945,9 @@ static inline u32 file_to_av(struct file *file) static inline u32 open_file_to_av(struct file *file) { u32 av = file_to_av(file); + struct inode *inode = file_inode(file); - if (selinux_policycap_openperm) + if (selinux_policycap_openperm && inode->i_sb->s_magic != SOCKFS_MAGIC) av |= FILE__OPEN; return av; @@ -2915,6 +2916,7 @@ static int selinux_inode_permission(struct inode *inode, int mask) static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) { const struct cred *cred = current_cred(); + struct inode *inode = d_backing_inode(dentry); unsigned int ia_valid = iattr->ia_valid; __u32 av = FILE__WRITE; @@ -2930,8 +2932,10 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE) - && !(ia_valid & ATTR_FILE)) + if (selinux_policycap_openperm && + inode->i_sb->s_magic != SOCKFS_MAGIC && + (ia_valid & ATTR_SIZE) && + !(ia_valid & ATTR_FILE)) av |= FILE__OPEN; return dentry_has_perm(cred, dentry, av); diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 6cd8aec146f2..07feb35f1935 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -833,8 +833,25 @@ static int choose_rate(struct snd_pcm_substream *substream, return snd_pcm_hw_param_near(substream, params, SNDRV_PCM_HW_PARAM_RATE, best_rate, NULL); } -static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, - bool trylock) +/* parameter locking: returns immediately if tried during streaming */ +static int lock_params(struct snd_pcm_runtime *runtime) +{ + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + if (atomic_read(&runtime->oss.rw_ref)) { + mutex_unlock(&runtime->oss.params_lock); + return -EBUSY; + } + return 0; +} + +static void unlock_params(struct snd_pcm_runtime *runtime) +{ + mutex_unlock(&runtime->oss.params_lock); +} + +/* call with params_lock held */ +static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_pcm_hw_params *params, *sparams; @@ -848,12 +865,9 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, struct snd_mask sformat_mask; struct snd_mask mask; - if (trylock) { - if (!(mutex_trylock(&runtime->oss.params_lock))) - return -EAGAIN; - } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) - return -EINTR; - sw_params = kmalloc(sizeof(*sw_params), GFP_KERNEL); + if (!runtime->oss.params) + return 0; + sw_params = kzalloc(sizeof(*sw_params), GFP_KERNEL); params = kmalloc(sizeof(*params), GFP_KERNEL); sparams = kmalloc(sizeof(*sparams), GFP_KERNEL); if (!sw_params || !params || !sparams) { @@ -991,7 +1005,6 @@ static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, goto failure; } - memset(sw_params, 0, sizeof(*sw_params)); if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { @@ -1079,6 +1092,23 @@ failure: kfree(sw_params); kfree(params); kfree(sparams); + return err; +} + +/* this one takes the lock by itself */ +static int snd_pcm_oss_change_params(struct snd_pcm_substream *substream, + bool trylock) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + int err; + + if (trylock) { + if (!(mutex_trylock(&runtime->oss.params_lock))) + return -EAGAIN; + } else if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + + err = snd_pcm_oss_change_params_locked(substream); mutex_unlock(&runtime->oss.params_lock); return err; } @@ -1107,6 +1137,10 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil return 0; } +/* call with params_lock held */ +/* NOTE: this always call PREPARE unconditionally no matter whether + * runtime->oss.prepare is set or not + */ static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream) { int err; @@ -1131,8 +1165,6 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime; int err; - if (substream == NULL) - return 0; runtime = substream->runtime; if (runtime->oss.params) { err = snd_pcm_oss_change_params(substream, false); @@ -1140,6 +1172,29 @@ static int snd_pcm_oss_make_ready(struct snd_pcm_substream *substream) return err; } if (runtime->oss.prepare) { + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; + err = snd_pcm_oss_prepare(substream); + mutex_unlock(&runtime->oss.params_lock); + if (err < 0) + return err; + } + return 0; +} + +/* call with params_lock held */ +static int snd_pcm_oss_make_ready_locked(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime; + int err; + + runtime = substream->runtime; + if (runtime->oss.params) { + err = snd_pcm_oss_change_params_locked(substream); + if (err < 0) + return err; + } + if (runtime->oss.prepare) { err = snd_pcm_oss_prepare(substream); if (err < 0) return err; @@ -1367,13 +1422,15 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha if (atomic_read(&substream->mmap_count)) return -ENXIO; - if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) - return tmp; + atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } + tmp = snd_pcm_oss_make_ready_locked(substream); + if (tmp < 0) + goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { tmp = bytes; if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes) @@ -1429,6 +1486,7 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha } tmp = 0; } + atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1474,13 +1532,15 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use if (atomic_read(&substream->mmap_count)) return -ENXIO; - if ((tmp = snd_pcm_oss_make_ready(substream)) < 0) - return tmp; + atomic_inc(&runtime->oss.rw_ref); while (bytes > 0) { if (mutex_lock_interruptible(&runtime->oss.params_lock)) { tmp = -ERESTARTSYS; break; } + tmp = snd_pcm_oss_make_ready_locked(substream); + if (tmp < 0) + goto err; if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) { if (runtime->oss.buffer_used == 0) { tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1); @@ -1521,6 +1581,7 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use } tmp = 0; } + atomic_dec(&runtime->oss.rw_ref); return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp; } @@ -1536,10 +1597,12 @@ static int snd_pcm_oss_reset(struct snd_pcm_oss_file *pcm_oss_file) continue; runtime = substream->runtime; snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; runtime->oss.buffer_used = 0; runtime->oss.prev_hw_ptr_period = 0; runtime->oss.period_ptr = 0; + mutex_unlock(&runtime->oss.params_lock); } return 0; } @@ -1625,9 +1688,13 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) goto __direct; if ((err = snd_pcm_oss_make_ready(substream)) < 0) return err; + atomic_inc(&runtime->oss.rw_ref); + if (mutex_lock_interruptible(&runtime->oss.params_lock)) { + atomic_dec(&runtime->oss.rw_ref); + return -ERESTARTSYS; + } format = snd_pcm_oss_format_from(runtime->oss.format); width = snd_pcm_format_physical_width(format); - mutex_lock(&runtime->oss.params_lock); if (runtime->oss.buffer_used > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: buffer_used\n"); @@ -1637,10 +1704,8 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) runtime->oss.buffer + runtime->oss.buffer_used, size); err = snd_pcm_oss_sync1(substream, runtime->oss.period_bytes); - if (err < 0) { - mutex_unlock(&runtime->oss.params_lock); - return err; - } + if (err < 0) + goto unlock; } else if (runtime->oss.period_ptr > 0) { #ifdef OSS_DEBUG pcm_dbg(substream->pcm, "sync: period_ptr\n"); @@ -1650,10 +1715,8 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) runtime->oss.buffer, size * 8 / width); err = snd_pcm_oss_sync1(substream, size); - if (err < 0) { - mutex_unlock(&runtime->oss.params_lock); - return err; - } + if (err < 0) + goto unlock; } /* * The ALSA's period might be a bit large than OSS one. @@ -1684,7 +1747,11 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) snd_pcm_lib_writev(substream, buffers, size); } } +unlock: mutex_unlock(&runtime->oss.params_lock); + atomic_dec(&runtime->oss.rw_ref); + if (err < 0) + return err; /* * finish sync: drain the buffer */ @@ -1695,7 +1762,9 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) substream->f_flags = saved_f_flags; if (err < 0) return err; + mutex_lock(&runtime->oss.params_lock); runtime->oss.prepare = 1; + mutex_unlock(&runtime->oss.params_lock); } substream = pcm_oss_file->streams[SNDRV_PCM_STREAM_CAPTURE]; @@ -1706,8 +1775,10 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file) err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); if (err < 0) return err; + mutex_lock(&runtime->oss.params_lock); runtime->oss.buffer_used = 0; runtime->oss.prepare = 1; + mutex_unlock(&runtime->oss.params_lock); } return 0; } @@ -1719,6 +1790,8 @@ static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; + int err; + if (substream == NULL) continue; runtime = substream->runtime; @@ -1726,10 +1799,14 @@ static int snd_pcm_oss_set_rate(struct snd_pcm_oss_file *pcm_oss_file, int rate) rate = 1000; else if (rate > 192000) rate = 192000; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.rate != rate) { runtime->oss.params = 1; runtime->oss.rate = rate; } + unlock_params(runtime); } return snd_pcm_oss_get_rate(pcm_oss_file); } @@ -1754,13 +1831,19 @@ static int snd_pcm_oss_set_channels(struct snd_pcm_oss_file *pcm_oss_file, unsig for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; struct snd_pcm_runtime *runtime; + int err; + if (substream == NULL) continue; runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.channels != channels) { runtime->oss.params = 1; runtime->oss.channels = channels; } + unlock_params(runtime); } return snd_pcm_oss_get_channels(pcm_oss_file); } @@ -1833,6 +1916,7 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int format) { int formats, idx; + int err; if (format != AFMT_QUERY) { formats = snd_pcm_oss_get_formats(pcm_oss_file); @@ -1846,10 +1930,14 @@ static int snd_pcm_oss_set_format(struct snd_pcm_oss_file *pcm_oss_file, int for if (substream == NULL) continue; runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; if (runtime->oss.format != format) { runtime->oss.params = 1; runtime->oss.format = format; } + unlock_params(runtime); } } return snd_pcm_oss_get_format(pcm_oss_file); @@ -1869,8 +1957,6 @@ static int snd_pcm_oss_set_subdivide1(struct snd_pcm_substream *substream, int s { struct snd_pcm_runtime *runtime; - if (substream == NULL) - return 0; runtime = substream->runtime; if (subdivide == 0) { subdivide = runtime->oss.subdivision; @@ -1894,9 +1980,17 @@ static int snd_pcm_oss_set_subdivide(struct snd_pcm_oss_file *pcm_oss_file, int for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; + struct snd_pcm_runtime *runtime; + if (substream == NULL) continue; - if ((err = snd_pcm_oss_set_subdivide1(substream, subdivide)) < 0) + runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; + err = snd_pcm_oss_set_subdivide1(substream, subdivide); + unlock_params(runtime); + if (err < 0) return err; } return err; @@ -1906,8 +2000,6 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign { struct snd_pcm_runtime *runtime; - if (substream == NULL) - return 0; runtime = substream->runtime; if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; @@ -1927,9 +2019,17 @@ static int snd_pcm_oss_set_fragment(struct snd_pcm_oss_file *pcm_oss_file, unsig for (idx = 1; idx >= 0; --idx) { struct snd_pcm_substream *substream = pcm_oss_file->streams[idx]; + struct snd_pcm_runtime *runtime; + if (substream == NULL) continue; - if ((err = snd_pcm_oss_set_fragment1(substream, val)) < 0) + runtime = substream->runtime; + err = lock_params(runtime); + if (err < 0) + return err; + err = snd_pcm_oss_set_fragment1(substream, val); + unlock_params(runtime); + if (err < 0) return err; } return err; @@ -2013,6 +2113,9 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr } if (psubstream) { runtime = psubstream->runtime; + cmd = 0; + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; if (trigger & PCM_ENABLE_OUTPUT) { if (runtime->oss.trigger) goto _skip1; @@ -2030,13 +2133,19 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } - err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); - if (err < 0) - return err; - } _skip1: + mutex_unlock(&runtime->oss.params_lock); + if (cmd) { + err = snd_pcm_kernel_ioctl(psubstream, cmd, NULL); + if (err < 0) + return err; + } + } if (csubstream) { runtime = csubstream->runtime; + cmd = 0; + if (mutex_lock_interruptible(&runtime->oss.params_lock)) + return -ERESTARTSYS; if (trigger & PCM_ENABLE_INPUT) { if (runtime->oss.trigger) goto _skip2; @@ -2051,11 +2160,14 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr cmd = SNDRV_PCM_IOCTL_DROP; runtime->oss.prepare = 1; } - err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); - if (err < 0) - return err; - } _skip2: + mutex_unlock(&runtime->oss.params_lock); + if (cmd) { + err = snd_pcm_kernel_ioctl(csubstream, cmd, NULL); + if (err < 0) + return err; + } + } return 0; } @@ -2307,6 +2419,7 @@ static void snd_pcm_oss_init_substream(struct snd_pcm_substream *substream, runtime->oss.maxfrags = 0; runtime->oss.subdivision = 0; substream->pcm_release = snd_pcm_oss_release_substream; + atomic_set(&runtime->oss.rw_ref, 0); } static int snd_pcm_oss_release_file(struct snd_pcm_oss_file *pcm_oss_file) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 48f6aee3680d..d79a04e703dc 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -28,6 +28,7 @@ #include <sound/core.h> #include <sound/minors.h> #include <sound/pcm.h> +#include <sound/timer.h> #include <sound/control.h> #include <sound/info.h> @@ -1036,8 +1037,13 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream) snd_free_pages((void*)runtime->control, PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control))); kfree(runtime->hw_constraints.rules); - kfree(runtime); + /* Avoid concurrent access to runtime via PCM timer interface */ + if (substream->timer) + spin_lock_irq(&substream->timer->lock); substream->runtime = NULL; + if (substream->timer) + spin_unlock_irq(&substream->timer->lock); + kfree(runtime); put_pid(substream->pid); substream->pid = NULL; substream->pstr->substream_opened--; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 16f8124b1150..514380104944 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -115,6 +115,7 @@ static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream) return -ENOMEM; runtime->substream = substream; spin_lock_init(&runtime->lock); + mutex_init(&runtime->realloc_mutex); init_waitqueue_head(&runtime->sleep); INIT_WORK(&runtime->event_work, snd_rawmidi_input_event_work); runtime->event = NULL; @@ -636,8 +637,10 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { char *newbuf; + char *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; - + unsigned long flags; + if (substream->append && substream->use_count > 1) return -EBUSY; snd_rawmidi_drain_output(substream); @@ -648,13 +651,22 @@ int snd_rawmidi_output_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, + mutex_lock(&runtime->realloc_mutex); + newbuf = __krealloc(runtime->buffer, params->buffer_size, GFP_KERNEL); - if (!newbuf) + if (!newbuf) { + mutex_unlock(&runtime->realloc_mutex); return -ENOMEM; + } + spin_lock_irqsave(&runtime->lock, flags); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; runtime->avail = runtime->buffer_size; + spin_unlock_irqrestore(&runtime->lock, flags); + if (oldbuf != newbuf) + kfree(oldbuf); + mutex_unlock(&runtime->realloc_mutex); } runtime->avail_min = params->avail_min; substream->active_sensing = !params->no_active_sensing; @@ -666,7 +678,9 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, struct snd_rawmidi_params * params) { char *newbuf; + char *oldbuf; struct snd_rawmidi_runtime *runtime = substream->runtime; + unsigned long flags; snd_rawmidi_drain_input(substream); if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L) { @@ -676,12 +690,21 @@ int snd_rawmidi_input_params(struct snd_rawmidi_substream *substream, return -EINVAL; } if (params->buffer_size != runtime->buffer_size) { - newbuf = krealloc(runtime->buffer, params->buffer_size, + mutex_lock(&runtime->realloc_mutex); + newbuf = __krealloc(runtime->buffer, params->buffer_size, GFP_KERNEL); - if (!newbuf) + if (!newbuf) { + mutex_unlock(&runtime->realloc_mutex); return -ENOMEM; + } + spin_lock_irqsave(&runtime->lock, flags); + oldbuf = runtime->buffer; runtime->buffer = newbuf; runtime->buffer_size = params->buffer_size; + spin_unlock_irqrestore(&runtime->lock, flags); + if (oldbuf != newbuf) + kfree(oldbuf); + mutex_unlock(&runtime->realloc_mutex); } runtime->avail_min = params->avail_min; return 0; @@ -954,6 +977,8 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, unsigned long appl_ptr; spin_lock_irqsave(&runtime->lock, flags); + if (userbuf) + mutex_lock(&runtime->realloc_mutex); while (count > 0 && runtime->avail) { count1 = runtime->buffer_size - runtime->appl_ptr; if (count1 > count) @@ -973,6 +998,7 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, spin_unlock_irqrestore(&runtime->lock, flags); if (copy_to_user(userbuf + result, runtime->buffer + appl_ptr, count1)) { + mutex_unlock(&runtime->realloc_mutex); return result > 0 ? result : -EFAULT; } spin_lock_irqsave(&runtime->lock, flags); @@ -981,6 +1007,8 @@ static long snd_rawmidi_kernel_read1(struct snd_rawmidi_substream *substream, count -= count1; } spin_unlock_irqrestore(&runtime->lock, flags); + if (userbuf) + mutex_unlock(&runtime->realloc_mutex); return result; } @@ -1245,10 +1273,14 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, return -EINVAL; result = 0; + if (userbuf) + mutex_lock(&runtime->realloc_mutex); spin_lock_irqsave(&runtime->lock, flags); if (substream->append) { if ((long)runtime->avail < count) { spin_unlock_irqrestore(&runtime->lock, flags); + if (userbuf) + mutex_unlock(&runtime->realloc_mutex); return -EAGAIN; } } @@ -1284,6 +1316,8 @@ static long snd_rawmidi_kernel_write1(struct snd_rawmidi_substream *substream, __end: count1 = runtime->avail < runtime->buffer_size; spin_unlock_irqrestore(&runtime->lock, flags); + if (userbuf) + mutex_unlock(&runtime->realloc_mutex); if (count1) snd_rawmidi_output_trigger(substream, 1); return result; diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index 09a89094dcf7..4e304a24924a 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -36,8 +36,6 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, struct snd_rawmidi_params params; unsigned int val; - if (rfile->output == NULL) - return -EINVAL; if (get_user(params.stream, &src->stream) || get_user(params.buffer_size, &src->buffer_size) || get_user(params.avail_min, &src->avail_min) || @@ -46,8 +44,12 @@ static int snd_rawmidi_ioctl_params_compat(struct snd_rawmidi_file *rfile, params.no_active_sensing = val; switch (params.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; return snd_rawmidi_output_params(rfile->output, ¶ms); case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; return snd_rawmidi_input_params(rfile->input, ¶ms); } return -EINVAL; @@ -67,16 +69,18 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: @@ -113,16 +117,18 @@ static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, int err; struct snd_rawmidi_status status; - if (rfile->output == NULL) - return -EINVAL; if (get_user(status.stream, &src->stream)) return -EFAULT; switch (status.stream) { case SNDRV_RAWMIDI_STREAM_OUTPUT: + if (!rfile->output) + return -EINVAL; err = snd_rawmidi_output_status(rfile->output, &status); break; case SNDRV_RAWMIDI_STREAM_INPUT: + if (!rfile->input) + return -EINVAL; err = snd_rawmidi_input_status(rfile->input, &status); break; default: diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index fbd00821e326..3be91696ac35 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1549,7 +1549,8 @@ static void azx_check_snoop_available(struct azx *chip) */ u8 val; pci_read_config_byte(chip->pci, 0x42, &val); - if (!(val & 0x80) && chip->pci->revision == 0x30) + if (!(val & 0x80) && (chip->pci->revision == 0x30 || + chip->pci->revision == 0x20)) snoop = false; } diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 4452fea0b118..bd4998f577a0 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -54,10 +54,17 @@ struct ssm2602_priv { * using 2 wire for device control, so we cache them instead. * There is no point in caching the reset register */ -static const u16 ssm2602_reg[SSM2602_CACHEREGNUM] = { - 0x0097, 0x0097, 0x0079, 0x0079, - 0x000a, 0x0008, 0x009f, 0x000a, - 0x0000, 0x0000 +static const struct reg_default ssm2602_reg[SSM2602_CACHEREGNUM] = { + { .reg = 0x00, .def = 0x0097 }, + { .reg = 0x01, .def = 0x0097 }, + { .reg = 0x02, .def = 0x0079 }, + { .reg = 0x03, .def = 0x0079 }, + { .reg = 0x04, .def = 0x000a }, + { .reg = 0x05, .def = 0x0008 }, + { .reg = 0x06, .def = 0x009f }, + { .reg = 0x07, .def = 0x000a }, + { .reg = 0x08, .def = 0x0000 }, + { .reg = 0x09, .def = 0x0000 } }; @@ -618,8 +625,8 @@ const struct regmap_config ssm2602_regmap_config = { .volatile_reg = ssm2602_register_volatile, .cache_type = REGCACHE_RBTREE, - .reg_defaults_raw = ssm2602_reg, - .num_reg_defaults_raw = ARRAY_SIZE(ssm2602_reg), + .reg_defaults = ssm2602_reg, + .num_reg_defaults = ARRAY_SIZE(ssm2602_reg), }; EXPORT_SYMBOL_GPL(ssm2602_regmap_config); diff --git a/sound/soc/intel/atom/sst/sst_stream.c b/sound/soc/intel/atom/sst/sst_stream.c index a74c64c7053c..e83da42a8c03 100644 --- a/sound/soc/intel/atom/sst/sst_stream.c +++ b/sound/soc/intel/atom/sst/sst_stream.c @@ -221,7 +221,7 @@ int sst_send_byte_stream_mrfld(struct intel_sst_drv *sst_drv_ctx, sst_free_block(sst_drv_ctx, block); out: test_and_clear_bit(pvt_id, &sst_drv_ctx->pvt_id); - return 0; + return ret; } /* diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 38d65a3529c4..44d560966e9c 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -96,6 +96,7 @@ static const struct snd_soc_dapm_widget cht_dapm_widgets[] = { SND_SOC_DAPM_HP("Headphone", NULL), SND_SOC_DAPM_MIC("Headset Mic", NULL), SND_SOC_DAPM_MIC("Int Mic", NULL), + SND_SOC_DAPM_MIC("Int Analog Mic", NULL), SND_SOC_DAPM_SPK("Ext Spk", NULL), SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control, SND_SOC_DAPM_POST_PMD), @@ -106,6 +107,8 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"IN1N", NULL, "Headset Mic"}, {"DMIC L1", NULL, "Int Mic"}, {"DMIC R1", NULL, "Int Mic"}, + {"IN2P", NULL, "Int Analog Mic"}, + {"IN2N", NULL, "Int Analog Mic"}, {"Headphone", NULL, "HPOL"}, {"Headphone", NULL, "HPOR"}, {"Ext Spk", NULL, "SPOL"}, @@ -119,6 +122,9 @@ static const struct snd_soc_dapm_route cht_rt5645_audio_map[] = { {"Headphone", NULL, "Platform Clock"}, {"Headset Mic", NULL, "Platform Clock"}, {"Int Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "Platform Clock"}, + {"Int Analog Mic", NULL, "micbias1"}, + {"Int Analog Mic", NULL, "micbias2"}, {"Ext Spk", NULL, "Platform Clock"}, }; @@ -147,6 +153,7 @@ static const struct snd_kcontrol_new cht_mc_controls[] = { SOC_DAPM_PIN_SWITCH("Headphone"), SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Int Mic"), + SOC_DAPM_PIN_SWITCH("Int Analog Mic"), SOC_DAPM_PIN_SWITCH("Ext Spk"), }; diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index a001331a53c1..df79d7c846ea 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -453,6 +453,13 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); u32 *buf = (u32 *)(runtime->dma_area + rsnd_dai_pointer_offset(io, 0)); + int shift = 0; + + switch (runtime->sample_bits) { + case 32: + shift = 8; + break; + } /* * 8/16/32 data can be assesse to TDR/RDR register @@ -460,9 +467,9 @@ static void __rsnd_ssi_interrupt(struct rsnd_mod *mod, * see rsnd_ssi_init() */ if (rsnd_io_is_play(io)) - rsnd_mod_write(mod, SSITDR, *buf); + rsnd_mod_write(mod, SSITDR, (*buf) << shift); else - *buf = rsnd_mod_read(mod, SSIRDR); + *buf = (rsnd_mod_read(mod, SSIRDR) >> shift); elapsed = rsnd_dai_pointer_update(io, sizeof(*buf)); } diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index cebea9b7f769..6a9be1df7851 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -125,7 +125,7 @@ static int send_midi_async(struct usb_line6 *line6, unsigned char *data, } usb_fill_int_urb(urb, line6->usbdev, - usb_sndbulkpipe(line6->usbdev, + usb_sndintpipe(line6->usbdev, line6->properties->ep_ctrl_w), transfer_buffer, length, midi_sent, line6, line6->interval); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ebe7115c751a..da8afc121118 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1152,6 +1152,10 @@ static struct syscall_fmt { { .name = "mlockall", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mmap", .hexret = true, +/* The standard mmap maps to old_mmap on s390x */ +#if defined(__s390x__) + .alias = "old_mmap", +#endif .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ [3] = SCA_MMAP_FLAGS, /* flags */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 43838003c1a1..304f5d710143 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1258,8 +1258,16 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, cpumode)) - dso->kernel = dso_type; + if (dso_type != DSO_TYPE_USER) { + struct kmod_path m = { .name = NULL, }; + + if (!kmod_path__parse_name(&m, filename) && m.kmod) + dso__set_short_name(dso, strdup(m.name), true); + else + dso->kernel = dso_type; + + free(m.name); + } build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index eeeae0629ad3..0b540b84f8b7 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1270,6 +1270,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); decoder->have_tma = false; decoder->cbr = 0; + decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; decoder->overflow = true; return -EOVERFLOW; @@ -1492,6 +1493,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_PSBEND: intel_pt_log("ERROR: Missing TIP after FUP\n"); decoder->pkt_state = INTEL_PT_STATE_ERR3; + decoder->pkt_step = 0; return -ENOENT; case INTEL_PT_OVF: @@ -2152,14 +2154,6 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) return &decoder->state; } -static bool intel_pt_at_psb(unsigned char *buf, size_t len) -{ - if (len < INTEL_PT_PSB_LEN) - return false; - return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, - INTEL_PT_PSB_LEN); -} - /** * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. * @buf: pointer to buffer pointer @@ -2248,6 +2242,7 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * @buf: buffer * @len: size of buffer * @tsc: TSC value returned + * @rem: returns remaining size when TSC is found * * Find a TSC packet in @buf and return the TSC value. This function assumes * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a @@ -2255,7 +2250,8 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) * * Return: %true if TSC is found, false otherwise. */ -static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, + size_t *rem) { struct intel_pt_pkt packet; int ret; @@ -2266,6 +2262,7 @@ static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) return false; if (packet.type == INTEL_PT_TSC) { *tsc = packet.payload; + *rem = len; return true; } if (packet.type == INTEL_PT_PSBEND) @@ -2316,6 +2313,8 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) * @len_a: size of first buffer * @buf_b: second buffer * @len_b: size of second buffer + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * If the trace contains TSC we can look at the last TSC of @buf_a and the * first TSC of @buf_b in order to determine if the buffers overlap, and then @@ -2328,33 +2327,41 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, - size_t len_b) + size_t len_b, bool *consecutive) { uint64_t tsc_a, tsc_b; unsigned char *p; - size_t len; + size_t len, rem_a, rem_b; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No PSB in buf_a => no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) { + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) { /* The last PSB+ in buf_a is incomplete, so go back one more */ len_a -= len; p = intel_pt_last_psb(buf_a, len_a); if (!p) return buf_b; /* No full PSB+ => assume no overlap */ len = len_a - (p - buf_a); - if (!intel_pt_next_tsc(p, len, &tsc_a)) + if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) return buf_b; /* No TSC in buf_a => assume no overlap */ } while (1) { /* Ignore PSB+ with no TSC */ - if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && - intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) - return buf_b; /* tsc_a < tsc_b => no overlap */ + if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) { + int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b); + + /* Same TSC, so buffers are consecutive */ + if (!cmp && rem_b >= rem_a) { + *consecutive = true; + return buf_b + len_b - (rem_b - rem_a); + } + if (cmp < 0) + return buf_b; /* tsc_a < tsc_b => no overlap */ + } if (!intel_pt_step_psb(&buf_b, &len_b)) return buf_b + len_b; /* No PSB in buf_b => no data */ @@ -2368,6 +2375,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, * @buf_b: second buffer * @len_b: size of second buffer * @have_tsc: can use TSC packets to detect overlap + * @consecutive: returns true if there is data in buf_b that is consecutive + * to buf_a * * When trace samples or snapshots are recorded there is the possibility that * the data overlaps. Note that, for the purposes of decoding, data is only @@ -2378,7 +2387,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, */ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc) + bool have_tsc, bool *consecutive) { unsigned char *found; @@ -2390,7 +2399,8 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, return buf_b; /* No overlap */ if (have_tsc) { - found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); + found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b, + consecutive); if (found) return found; } @@ -2405,28 +2415,16 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, } /* Now len_b >= len_a */ - if (len_b > len_a) { - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } - } - while (1) { /* Potential overlap so check the bytes */ found = memmem(buf_a, len_a, buf_b, len_a); - if (found) + if (found) { + *consecutive = true; return buf_b + len_a; + } /* Try again at next PSB in buffer 'a' */ if (!intel_pt_step_psb(&buf_a, &len_a)) return buf_b; /* No overlap */ - - /* The leftover buffer 'b' must start at a PSB */ - while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { - if (!intel_pt_step_psb(&buf_a, &len_a)) - return buf_b; /* No overlap */ - } } } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 02c38fec1c37..89a3eda6a318 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -102,7 +102,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc); + bool have_tsc, bool *consecutive); int intel_pt__strerror(int code, char *buf, size_t buflen); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 89927b5beebf..3693cb26ec66 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -125,6 +125,7 @@ struct intel_pt_queue { bool stop; bool step_through_buffers; bool use_buffer_pid_tid; + bool sync_switch; pid_t pid, tid; int cpu; int switch_state; @@ -188,14 +189,17 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { + bool consecutive = false; void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc); + pt->have_tsc, &consecutive); if (!start) return -EINVAL; b->use_size = b->data + b->size - start; b->use_data = start; + if (b->use_size && consecutive) + b->consecutive = true; return 0; } @@ -849,10 +853,12 @@ static int intel_pt_setup_queue(struct intel_pt *pt, if (pt->timeless_decoding || !pt->have_sched_switch) ptq->use_buffer_pid_tid = true; } + + ptq->sync_switch = pt->sync_switch; } if (!ptq->on_heap && - (!pt->sync_switch || + (!ptq->sync_switch || ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { const struct intel_pt_state *state; int ret; @@ -1235,7 +1241,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch) intel_pt_update_last_branch_rb(ptq); - if (!pt->sync_switch) + if (!ptq->sync_switch) return 0; if (intel_pt_is_switch_ip(ptq, state->to_ip)) { @@ -1316,6 +1322,21 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) return switch_ip; } +static void intel_pt_enable_sync_switch(struct intel_pt *pt) +{ + unsigned int i; + + pt->sync_switch = true; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq) + ptq->sync_switch = true; + } +} + static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) { const struct intel_pt_state *state = ptq->state; @@ -1332,7 +1353,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", pt->switch_ip, pt->ptss_ip); - pt->sync_switch = true; + intel_pt_enable_sync_switch(pt); } } } @@ -1348,9 +1369,9 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (state->err) { if (state->err == INTEL_PT_ERR_NODATA) return 1; - if (pt->sync_switch && + if (ptq->sync_switch && state->from_ip >= pt->kernel_start) { - pt->sync_switch = false; + ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } if (pt->synth_opts.errors) { @@ -1376,7 +1397,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) state->timestamp, state->est_timestamp); ptq->timestamp = state->est_timestamp; /* Use estimated TSC in unknown switch state */ - } else if (pt->sync_switch && + } else if (ptq->sync_switch && ptq->switch_state == INTEL_PT_SS_UNKNOWN && intel_pt_is_switch_ip(ptq, state->to_ip) && ptq->next_tid == -1) { @@ -1523,7 +1544,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, return 1; ptq = intel_pt_cpu_to_ptq(pt, cpu); - if (!ptq) + if (!ptq || !ptq->sync_switch) return 1; switch (ptq->switch_state) { diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 03875f9154e7..0195b7e8c54a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2349,6 +2349,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 2dcfe9a7c8d0..60edec383281 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -37,6 +37,14 @@ static int __report_module(struct addr_location *al, u64 ip, return 0; mod = dwfl_addrmodule(ui->dwfl, ip); + if (mod) { + Dwarf_Addr s; + + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); + if (s != al->map->start) + mod = 0; + } + if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1, al->map->start, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 47b1e36c7ea0..9adc9af8b048 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -162,7 +162,7 @@ int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) size -= ret; off_in += ret; - off_out -= ret; + off_out += ret; } munmap(ptr, off_in + size); diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 42d4c8caad81..de8dc82e2567 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -45,12 +45,12 @@ int test_body(void) printf("Check DSCR TM context switch: "); fflush(stdout); for (;;) { - rv = 1; asm __volatile__ ( /* set a known value into the DSCR */ "ld 3, %[dscr1];" "mtspr %[sprn_dscr], 3;" + "li %[rv], 1;" /* start and suspend a transaction */ TBEGIN "beq 1f;" |
