diff options
Diffstat (limited to 'include')
286 files changed, 5097 insertions, 1169 deletions
diff --git a/include/asm-generic/asm-prototypes.h b/include/asm-generic/asm-prototypes.h index df13637e4017..939869c772b1 100644 --- a/include/asm-generic/asm-prototypes.h +++ b/include/asm-generic/asm-prototypes.h @@ -1,7 +1,13 @@ #include <linux/bitops.h> +#undef __memset extern void *__memset(void *, int, __kernel_size_t); +#undef __memcpy extern void *__memcpy(void *, const void *, __kernel_size_t); +#undef __memmove extern void *__memmove(void *, const void *, __kernel_size_t); +#undef memset extern void *memset(void *, int, __kernel_size_t); +#undef memcpy extern void *memcpy(void *, const void *, __kernel_size_t); +#undef memmove extern void *memmove(void *, const void *, __kernel_size_t); diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h index 4e3b6558331e..3e457ae2d571 100644 --- a/include/asm-generic/rtc.h +++ b/include/asm-generic/rtc.h @@ -106,7 +106,7 @@ static inline unsigned int __get_rtc_time(struct rtc_time *time) time->tm_year += real_year - 72; #endif - if (century) + if (century > 20) time->tm_year += (century - 19) * 100; /* diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 9dbb739cafa0..5f794f6ec6c7 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -165,6 +165,13 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) #define tlb_end_vma __tlb_end_vma #endif +static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + tlb->start = min(tlb->start, address); + tlb->end = max(tlb->end, address + size); +} + #ifndef __tlb_remove_tlb_entry #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #endif diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 5d2add1a6c96..864fcfa1df41 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -51,7 +51,7 @@ #ifdef CONFIG_NEED_MULTIPLE_NODES #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) #else - #define cpumask_of_node(node) ((void)node, cpu_online_mask) + #define cpumask_of_node(node) ((void)(node), cpu_online_mask) #endif #endif #ifndef pcibus_to_node diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 2a673e31b3dd..2014a1bc8f3d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -89,7 +89,7 @@ #ifdef CONFIG_FTRACE_MCOUNT_RECORD #define MCOUNT_REC() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_mcount_loc) = .; \ - *(__mcount_loc) \ + KEEP(*(__mcount_loc)) \ VMLINUX_SYMBOL(__stop_mcount_loc) = .; #else #define MCOUNT_REC() @@ -123,10 +123,10 @@ #ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() . = ALIGN(8); \ VMLINUX_SYMBOL(__start_ftrace_events) = .; \ - *(_ftrace_events) \ + KEEP(*(_ftrace_events)) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; \ VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \ - *(_ftrace_enum_map) \ + KEEP(*(_ftrace_enum_map)) \ VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .; #else #define FTRACE_EVENTS() @@ -169,8 +169,8 @@ #define _OF_TABLE_1(name) \ . = ALIGN(8); \ VMLINUX_SYMBOL(__##name##_of_table) = .; \ - *(__##name##_of_table) \ - *(__##name##_of_table_end) + KEEP(*(__##name##_of_table)) \ + KEEP(*(__##name##_of_table_end)) #define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc) #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip) @@ -196,9 +196,14 @@ *(.dtb.init.rodata) \ VMLINUX_SYMBOL(__dtb_end) = .; -/* .data section */ +/* + * .data section + * -fdata-sections generates .data.identifier which needs to be pulled in + * with .data, but don't want to pull in .data..stuff which has its own + * requirements. Same for bss. + */ #define DATA_DATA \ - *(.data) \ + *(.data .data.[0-9a-zA-Z_]*) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data) \ @@ -281,28 +286,28 @@ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ - *(.pci_fixup_early) \ + KEEP(*(.pci_fixup_early)) \ VMLINUX_SYMBOL(__end_pci_fixups_early) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_header) = .; \ - *(.pci_fixup_header) \ + KEEP(*(.pci_fixup_header)) \ VMLINUX_SYMBOL(__end_pci_fixups_header) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_final) = .; \ - *(.pci_fixup_final) \ + KEEP(*(.pci_fixup_final)) \ VMLINUX_SYMBOL(__end_pci_fixups_final) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_enable) = .; \ - *(.pci_fixup_enable) \ + KEEP(*(.pci_fixup_enable)) \ VMLINUX_SYMBOL(__end_pci_fixups_enable) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \ - *(.pci_fixup_resume) \ + KEEP(*(.pci_fixup_resume)) \ VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \ - *(.pci_fixup_resume_early) \ + KEEP(*(.pci_fixup_resume_early)) \ VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \ - *(.pci_fixup_suspend) \ + KEEP(*(.pci_fixup_suspend)) \ VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \ VMLINUX_SYMBOL(__start_pci_fixups_suspend_late) = .; \ - *(.pci_fixup_suspend_late) \ + KEEP(*(.pci_fixup_suspend_late)) \ VMLINUX_SYMBOL(__end_pci_fixups_suspend_late) = .; \ } \ \ @@ -318,76 +323,76 @@ /* Kernel symbol table: Normal symbols */ \ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab) = .; \ - *(SORT(___ksymtab+*)) \ + KEEP(*(SORT(___ksymtab+*))) \ VMLINUX_SYMBOL(__stop___ksymtab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \ - *(SORT(___ksymtab_gpl+*)) \ + KEEP(*(SORT(___ksymtab_gpl+*))) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \ - *(SORT(___ksymtab_unused+*)) \ + KEEP(*(SORT(___ksymtab_unused+*))) \ VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \ - *(SORT(___ksymtab_unused_gpl+*)) \ + KEEP(*(SORT(___ksymtab_unused_gpl+*))) \ VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \ - *(SORT(___ksymtab_gpl_future+*)) \ + KEEP(*(SORT(___ksymtab_gpl_future+*))) \ VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \ } \ \ /* Kernel symbol table: Normal symbols */ \ __kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab) = .; \ - *(SORT(___kcrctab+*)) \ + KEEP(*(SORT(___kcrctab+*))) \ VMLINUX_SYMBOL(__stop___kcrctab) = .; \ } \ \ /* Kernel symbol table: GPL-only symbols */ \ __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \ - *(SORT(___kcrctab_gpl+*)) \ + KEEP(*(SORT(___kcrctab_gpl+*))) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \ } \ \ /* Kernel symbol table: Normal unused symbols */ \ __kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \ - *(SORT(___kcrctab_unused+*)) \ + KEEP(*(SORT(___kcrctab_unused+*))) \ VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \ } \ \ /* Kernel symbol table: GPL-only unused symbols */ \ __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \ - *(SORT(___kcrctab_unused_gpl+*)) \ + KEEP(*(SORT(___kcrctab_unused_gpl+*))) \ VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \ } \ \ /* Kernel symbol table: GPL-future-only symbols */ \ __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \ - *(SORT(___kcrctab_gpl_future+*)) \ + KEEP(*(SORT(___kcrctab_gpl_future+*))) \ VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \ } \ \ /* Kernel symbol table: strings */ \ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ - *(__ksymtab_strings) \ + KEEP(*(__ksymtab_strings)) \ } \ \ /* __*init sections */ \ @@ -400,7 +405,7 @@ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ - *(__param) \ + KEEP(*(__param)) \ VMLINUX_SYMBOL(__stop___param) = .; \ } \ \ @@ -422,7 +427,7 @@ #define SECURITY_INIT \ .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__security_initcall_start) = .; \ - *(.security_initcall.init) \ + KEEP(*(.security_initcall.init)) \ VMLINUX_SYMBOL(__security_initcall_end) = .; \ } @@ -430,8 +435,12 @@ * during second ld run in second ld pass when generating System.map */ #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text .text.fixup .text.unlikely) \ + *(.text.hot .text.hot.*) \ + *(.text .text.fixup) \ + *(.text.unlikely .text.unlikely .text.*) \ + *(.text.unknown .text.unknown.*) \ *(.ref.text) \ + *(.text.asan.* .text.tsan.*) \ MEM_KEEP(init.text) \ MEM_KEEP(exit.text) \ @@ -477,7 +486,7 @@ VMLINUX_SYMBOL(__softirqentry_text_end) = .; /* Section used for early init (in .S files) */ -#define HEAD_TEXT *(.head.text) +#define HEAD_TEXT KEEP(*(.head.text)) #define HEAD_TEXT_SECTION \ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \ @@ -491,7 +500,7 @@ . = ALIGN(align); \ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___ex_table) = .; \ - *(__ex_table) \ + KEEP(*(__ex_table)) \ VMLINUX_SYMBOL(__stop___ex_table) = .; \ } @@ -517,11 +526,12 @@ /* init and exit section handling */ #define INIT_DATA \ + KEEP(*(SORT(___kentry+*))) \ *(.init.data) \ MEM_DISCARD(init.data) \ KERNEL_CTORS() \ MCOUNT_REC() \ - *(.init.rodata) \ + *(.init.rodata .init.rodata.*) \ FTRACE_EVENTS() \ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ @@ -539,7 +549,7 @@ EARLYCON_TABLE() #define INIT_TEXT \ - *(.init.text) \ + *(.init.text .init.text.*) \ *(.text.startup) \ MEM_DISCARD(init.text) @@ -556,7 +566,7 @@ MEM_DISCARD(exit.text) #define EXIT_CALL \ - *(.exitcall.exit) + KEEP(*(.exitcall.exit)) /* * bss (Block Started by Symbol) - uninitialized data @@ -583,7 +593,7 @@ BSS_FIRST_SECTIONS \ *(.bss..page_aligned) \ *(.dynbss) \ - *(.bss) \ + *(.bss .bss.[0-9a-zA-Z_]*) \ *(COMMON) \ } @@ -632,7 +642,7 @@ . = ALIGN(8); \ __bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___bug_table) = .; \ - *(__bug_table) \ + KEEP(*(__bug_table)) \ VMLINUX_SYMBOL(__stop___bug_table) = .; \ } #else @@ -661,17 +671,17 @@ #define INIT_SETUP(initsetup_align) \ . = ALIGN(initsetup_align); \ VMLINUX_SYMBOL(__setup_start) = .; \ - *(.init.setup) \ + KEEP(*(.init.setup)) \ VMLINUX_SYMBOL(__setup_end) = .; #define INIT_CALLS_LEVEL(level) \ VMLINUX_SYMBOL(__initcall##level##_start) = .; \ - *(.initcall##level##.init) \ - *(.initcall##level##s.init) \ + KEEP(*(.initcall##level##.init)) \ + KEEP(*(.initcall##level##s.init)) \ #define INIT_CALLS \ VMLINUX_SYMBOL(__initcall_start) = .; \ - *(.initcallearly.init) \ + KEEP(*(.initcallearly.init)) \ INIT_CALLS_LEVEL(0) \ INIT_CALLS_LEVEL(1) \ INIT_CALLS_LEVEL(2) \ @@ -685,21 +695,21 @@ #define CON_INITCALL \ VMLINUX_SYMBOL(__con_initcall_start) = .; \ - *(.con_initcall.init) \ + KEEP(*(.con_initcall.init)) \ VMLINUX_SYMBOL(__con_initcall_end) = .; #define SECURITY_INITCALL \ VMLINUX_SYMBOL(__security_initcall_start) = .; \ - *(.security_initcall.init) \ + KEEP(*(.security_initcall.init)) \ VMLINUX_SYMBOL(__security_initcall_end) = .; #ifdef CONFIG_BLK_DEV_INITRD #define INIT_RAM_FS \ . = ALIGN(4); \ VMLINUX_SYMBOL(__initramfs_start) = .; \ - *(.init.ramfs) \ + KEEP(*(.init.ramfs)) \ . = ALIGN(8); \ - *(.init.ramfs.info) + KEEP(*(.init.ramfs.info)) #else #define INIT_RAM_FS #endif diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index a2bfd7843f18..4bb6b98782e9 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -30,8 +30,8 @@ struct alg_sock { struct sock *parent; - unsigned int refcnt; - unsigned int nokey_refcnt; + atomic_t refcnt; + atomic_t nokey_refcnt; const struct af_alg_type *type; void *private; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 90aa5cb7ea82..b6a979fae948 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -83,13 +83,7 @@ int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst); void ahash_free_instance(struct crypto_instance *inst); -int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen); - -static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg) -{ - return alg->setkey != shash_no_setkey; -} +bool crypto_shash_alg_has_setkey(struct shash_alg *alg); bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index f356f9716474..674472ac067a 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -303,7 +303,7 @@ struct drm_dp_resource_status_notify { struct drm_dp_query_payload_ack_reply { u8 port_number; - u8 allocated_pbn; + u16 allocated_pbn; }; struct drm_dp_sideband_msg_req_body { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6a30f1e03aa9..719fb8b320fd 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -75,7 +75,7 @@ static inline bool has_acpi_companion(struct device *dev) static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { - ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, NULL)); + ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) @@ -604,6 +604,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; diff --git a/include/linux/atalk.h b/include/linux/atalk.h index af43ed404ff4..4be0e14b38fc 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -107,7 +107,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 9b95bb222e73..5d631cc6a05c 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -138,7 +138,7 @@ extern void __audit_getname(struct filename *name); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); -extern void __audit_inode_child(const struct inode *parent, +extern void __audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type); extern void __audit_seccomp(unsigned long syscall, long signr, int code); @@ -203,7 +203,7 @@ static inline void audit_inode_parent_hidden(struct filename *name, __audit_inode(name, dentry, AUDIT_INODE_PARENT | AUDIT_INODE_HIDDEN); } -static inline void audit_inode_child(const struct inode *parent, +static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { if (unlikely(!audit_dummy_context())) @@ -391,7 +391,7 @@ static inline void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags) { } -static inline void __audit_inode_child(const struct inode *parent, +static inline void __audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { } @@ -405,7 +405,7 @@ static inline void audit_file(struct file *file) static inline void audit_inode_parent_hidden(struct filename *name, const struct dentry *dentry) { } -static inline void audit_inode_child(const struct inode *parent, +static inline void audit_inode_child(struct inode *parent, const struct dentry *dentry, const unsigned char type) { } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 4bc6540d426b..970bef82da35 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -12,6 +12,7 @@ #include <linux/fs.h> #include <linux/sched.h> #include <linux/blkdev.h> +#include <linux/device.h> #include <linux/writeback.h> #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> @@ -526,4 +527,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } +extern const char *bdi_unknown_name; + +static inline const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 714ce4a5e31f..39d47af6ca7b 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -84,6 +84,14 @@ */ /* + * Allocation and deallocation of bitmap. + * Provided in lib/bitmap.c to avoid circular dependency. + */ +extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); +extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +extern void bitmap_free(const unsigned long *bitmap); + +/* * lib/bitmap.c provides these functions: */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 83edade218fa..9b9711ea267a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -3,7 +3,8 @@ #include <asm/types.h> #include <linux/bits.h> -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); @@ -58,7 +59,7 @@ static inline int get_count_order(unsigned int count) static __always_inline unsigned long hweight_long(unsigned long w) { - return sizeof(w) == 4 ? hweight32(w) : hweight64(w); + return sizeof(w) == 4 ? hweight32(w) : hweight64((__u64)w); } /** diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c02e669945e9..217879e49dbc 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -343,16 +343,7 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) */ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) { - char *p; - - p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); - if (!p) { - strncpy(buf, "<unavailable>", buflen); - return -ENAMETOOLONG; - } - - memmove(buf, p, buf + buflen - p); - return 0; + return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); } /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 58fd06e8f960..8ba034b90e9b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -44,7 +44,7 @@ struct pr_ops; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 2 +#define BLKCG_MAX_POLS 3 struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -267,6 +267,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_segment_size; unsigned int physical_block_size; + unsigned int logical_block_size; unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; @@ -276,7 +277,6 @@ struct queue_limits { unsigned int discard_granularity; unsigned int discard_alignment; - unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; @@ -434,7 +434,8 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; + struct mutex blk_trace_mutex; #endif /* * for flush operations @@ -838,6 +839,19 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) } /* + * The basic unit of block I/O is a sector. It is used in a number of contexts + * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9 + * bytes. Variables of type sector_t represent an offset or size that is a + * multiple of 512 bytes. Hence these two constants. + */ +#ifndef SECTOR_SHIFT +#define SECTOR_SHIFT 9 +#endif +#ifndef SECTOR_SIZE +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#endif + +/* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request * blk_rq_cur_bytes() : bytes left in the current segment @@ -864,19 +878,20 @@ extern unsigned int blk_rq_err_bytes(const struct request *rq); static inline unsigned int blk_rq_sectors(const struct request *rq) { - return blk_rq_bytes(rq) >> 9; + return blk_rq_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_rq_cur_sectors(const struct request *rq) { - return blk_rq_cur_bytes(rq) >> 9; + return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT; } static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, unsigned int cmd_flags) { if (unlikely(cmd_flags & REQ_DISCARD)) - return min(q->limits.max_discard_sectors, UINT_MAX >> 9); + return min(q->limits.max_discard_sectors, + UINT_MAX >> SECTOR_SHIFT); if (unlikely(cmd_flags & REQ_WRITE_SAME)) return q->limits.max_write_same_sectors; @@ -981,7 +996,7 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_same_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); @@ -1142,16 +1157,21 @@ extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + return blkdev_issue_discard(sb->s_bdev, + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, flags); } static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask) { return blkdev_issue_zeroout(sb->s_bdev, - block << (sb->s_blocksize_bits - 9), - nr_blocks << (sb->s_blocksize_bits - 9), + block << (sb->s_blocksize_bits - + SECTOR_SHIFT), + nr_blocks << (sb->s_blocksize_bits - + SECTOR_SHIFT), gfp_mask, true); } @@ -1202,7 +1222,7 @@ static inline unsigned int queue_max_segment_size(struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned short queue_logical_block_size(struct request_queue *q) +static inline unsigned queue_logical_block_size(struct request_queue *q) { int retval = 512; @@ -1212,7 +1232,7 @@ static inline unsigned short queue_logical_block_size(struct request_queue *q) return retval; } -static inline unsigned short bdev_logical_block_size(struct block_device *bdev) +static inline unsigned int bdev_logical_block_size(struct block_device *bdev) { return queue_logical_block_size(bdev_get_queue(bdev)); } @@ -1258,7 +1278,8 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = sector_div(sector, granularity >> 9) << 9; + unsigned int alignment = sector_div(sector, granularity >> SECTOR_SHIFT) + << SECTOR_SHIFT; return (granularity + lim->alignment_offset - alignment) % granularity; } @@ -1292,8 +1313,8 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return 0; /* Why are these in bytes, not sectors? */ - alignment = lim->discard_alignment >> 9; - granularity = lim->discard_granularity >> 9; + alignment = lim->discard_alignment >> SECTOR_SHIFT; + granularity = lim->discard_granularity >> SECTOR_SHIFT; if (!granularity) return 0; @@ -1304,7 +1325,7 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector offset = (granularity + alignment - offset) % granularity; /* Turn it back into bytes, gaah */ - return offset << 9; + return offset << SECTOR_SHIFT; } static inline int bdev_discard_alignment(struct block_device *bdev) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index afc1343df3c7..e644bfe50019 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, const char *fmt, ...); **/ #define blk_add_trace_msg(q, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, fmt, ##__VA_ARGS__); \ + rcu_read_unlock(); \ } while (0) #define BLK_TN_MAX_MSG 128 diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h new file mode 100644 index 000000000000..26dd401e12ae --- /dev/null +++ b/include/linux/bpf-cgroup.h @@ -0,0 +1,105 @@ +#ifndef _BPF_CGROUP_H +#define _BPF_CGROUP_H + +#include <linux/jump_label.h> +#include <uapi/linux/bpf.h> + +struct sock; +struct cgroup; +struct sk_buff; + +#ifdef CONFIG_CGROUP_BPF + +extern struct static_key_false cgroup_bpf_enabled_key; +#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) + +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + +struct cgroup_bpf { + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS + */ + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; +}; + +void cgroup_bpf_put(struct cgroup *cgrp); +int cgroup_bpf_inherit(struct cgroup *cgrp); + +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); + +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); + +int __cgroup_bpf_run_filter(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk) { \ + __ret = __cgroup_bpf_run_filter_sk(sk, \ + BPF_CGROUP_INET_SOCK_CREATE); \ + } \ + __ret; \ +}) + +#else + +struct cgroup_bpf {}; +static inline void cgroup_bpf_put(struct cgroup *cgrp) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } + +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) + +#endif /* CONFIG_CGROUP_BPF */ + +#endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bae3da5bcda0..77476208f1b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,14 +10,18 @@ #include <uapi/linux/bpf.h> #include <linux/workqueue.h> #include <linux/file.h> +#include <linux/percpu.h> +#include <linux/err.h> +struct perf_event; struct bpf_map; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ struct bpf_map *(*map_alloc)(union bpf_attr *attr); - void (*map_free)(struct bpf_map *); + void (*map_release)(struct bpf_map *map, struct file *map_file); + void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); /* funcs callable from userspace and from eBPF programs */ @@ -26,8 +30,9 @@ struct bpf_map_ops { int (*map_delete_elem)(struct bpf_map *map, void *key); /* funcs called by prog_array and perf_event_array map */ - void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); - void (*map_fd_put_ptr) (void *ptr); + void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, + int fd); + void (*map_fd_put_ptr)(void *ptr); }; struct bpf_map { @@ -39,6 +44,7 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; + u32 map_flags; u32 pages; bool unpriv_array; /* 7 bytes hole */ @@ -50,6 +56,9 @@ struct bpf_map { atomic_t refcnt; atomic_t usercnt; struct work_struct work; +#ifdef CONFIG_SECURITY + void *security; +#endif }; struct bpf_map_type_list { @@ -73,7 +82,13 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_STACK, /* any pointer to eBPF program stack */ + ARG_PTR_TO_RAW_STACK, /* any pointer to eBPF program stack, area does not + * need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_CONST_STACK_SIZE, /* number of bytes accessed from stack */ + ARG_CONST_STACK_SIZE_OR_ZERO, /* number of bytes accessed from stack or 0 */ ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ @@ -93,6 +108,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -112,6 +128,38 @@ enum bpf_access_type { BPF_WRITE = 2 }; +/* types of values stored in eBPF registers */ +enum bpf_reg_type { + NOT_INIT = 0, /* nothing was written into register */ + UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ + PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + FRAME_PTR, /* reg == frame_pointer */ + PTR_TO_STACK, /* reg == frame_pointer + imm */ + CONST_IMM, /* constant integer value */ + + /* PTR_TO_PACKET represents: + * skb->data + * skb->data + imm + * skb->data + (u16) var + * skb->data + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) menas that 'imm' was added + */ + PTR_TO_PACKET, + PTR_TO_PACKET_END, /* skb->data + headlen */ + + /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map + * elem value. We only allow this if we can statically verify that + * access from this register are going to fall within the size of the + * map element. + */ + PTR_TO_MAP_VALUE_ADJ, +}; + struct bpf_prog; struct bpf_verifier_ops { @@ -121,8 +169,10 @@ struct bpf_verifier_ops { /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ - bool (*is_valid_access)(int off, int size, enum bpf_access_type type); - + bool (*is_valid_access)(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type); + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); @@ -137,10 +187,14 @@ struct bpf_prog_type_list { struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; + u32 max_ctx_offset; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; +#ifdef CONFIG_SECURITY + void *security; +#endif union { struct work_struct work; struct rcu_head rcu; @@ -161,39 +215,154 @@ struct bpf_array { union { char value[0] __aligned(8); void *ptrs[0] __aligned(8); + void __percpu *pptrs[0] __aligned(8); }; }; + #define MAX_TAIL_CALL_CNT 32 +struct bpf_event_entry { + struct perf_event *event; + struct file *perf_file; + struct file *map_file; + struct rcu_head rcu; +}; + u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -void bpf_fd_array_map_clear(struct bpf_map *map); +u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); + const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, + unsigned long off, unsigned long len); + +u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, + void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); + +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ + ({ \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ + rcu_read_unlock(); \ + _ret; \ + }) + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL +DECLARE_PER_CPU(int, bpf_prog_active); + void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); +extern const struct file_operations bpf_map_fops; +extern const struct file_operations bpf_prog_fops; + struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); +int __bpf_prog_charge(struct user_struct *user, u32 pages); +void __bpf_prog_uncharge(struct user_struct *user, u32 pages); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); +int bpf_map_precharge_memlock(u32 pages); +void *bpf_map_area_alloc(size_t size); +void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; -int bpf_map_new_fd(struct bpf_map *map); +int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname, int flags); + +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, + u64 flags); +int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, + u64 flags); + +int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); + +int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags); +void bpf_fd_array_map_clear(struct bpf_map *map); + +int bpf_get_file_flag(int flags); + +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) { @@ -204,9 +373,39 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void bpf_prog_put(struct bpf_prog *prog) { } +static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) +{ + return 0; +} + +static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) +{ +} + +static inline int bpf_obj_get_user(const char __user *pathname) +{ + return -EOPNOTSUPP; +} + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ @@ -223,6 +422,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; +extern const struct bpf_func_proto bpf_get_stackid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 000000000000..5031defe59c5 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include <linux/bpf.h> /* for enum bpf_reg_type */ +#include <linux/filter.h> /* for MAX_BPF_STACK */ + + /* Just some arbitrary values so we can safely do math without overflowing and + * are obviously wrong for any sort of memory access. + */ +#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 + +struct bpf_reg_state { + enum bpf_reg_type type; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; + u32 id; + /* Used to determine if any memory access using this register will + * result in a bad access. These two fields must be last. + * See states_equal() + */ + s64 min_value; + u64 max_value; + bool value_from_signed; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; + int sanitize_stack_off; /* stack slot to be cleared */ + bool seen; /* this insn was processed by the verifier */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + bool varlen_map_value_access; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + +#endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index f7178f44825b..d2a497950639 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -17,6 +17,7 @@ #include <linux/can/error.h> #include <linux/can/led.h> #include <linux/can/netlink.h> +#include <linux/can/skb.h> #include <linux/netdevice.h> /* @@ -81,6 +82,36 @@ struct can_priv { #define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC)) #define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static inline bool can_skb_headroom_valid(struct net_device *dev, + struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* preform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ static inline bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) @@ -98,6 +129,9 @@ static inline bool can_dropped_invalid_skb(struct net_device *dev, } else goto inval_skb; + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + return false; inval_skb: diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 51bb6532785c..0e7350973e0e 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -48,8 +48,12 @@ static inline void can_skb_reserve(struct sk_buff *skb) static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { - if (sk) { - sock_hold(sk); + /* If the socket has already been closed by user space, the + * refcount may already be 0 (and the socket will be freed + * after the last TX skb has been freed). So only increase + * socket refcount if the refcount is > 0. + */ + if (sk && atomic_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } @@ -60,21 +64,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ diff --git a/include/linux/cdev.h b/include/linux/cdev.h index f8763615a5f2..408bc09ce497 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -4,6 +4,7 @@ #include <linux/kobject.h> #include <linux/kdev_t.h> #include <linux/list.h> +#include <linux/device.h> struct file_operations; struct inode; @@ -26,6 +27,10 @@ void cdev_put(struct cdev *p); int cdev_add(struct cdev *, dev_t, unsigned); +void cdev_set_parent(struct cdev *p, struct kobject *kobj); +int cdev_device_add(struct cdev *cdev, struct device *dev); +void cdev_device_del(struct cdev *cdev, struct device *dev); + void cdev_del(struct cdev *); void cd_forget(struct inode *); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4cd5c95d1ca0..8a590cd40be0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -16,6 +16,7 @@ #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> +#include <linux/bpf-cgroup.h> #ifdef CONFIG_CGROUPS @@ -27,6 +28,7 @@ struct kernfs_node; struct kernfs_ops; struct kernfs_open_file; struct seq_file; +struct poll_table_struct; #define MAX_CGROUP_TYPE_NAMELEN 32 #define MAX_CGROUP_ROOT_NAMELEN 64 @@ -34,22 +36,18 @@ struct seq_file; /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, -#define SUBSYS_TAG(_t) CGROUP_ ## _t, \ - __unused_tag_ ## _t = CGROUP_ ## _t - 1, enum cgroup_subsys_id { #include <linux/cgroup_subsys.h> CGROUP_SUBSYS_COUNT, }; -#undef SUBSYS_TAG #undef SUBSYS -#define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) - /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ + CSS_VISIBLE = (1 << 3), /* css is visible to userland */ }; /* bits in struct cgroup flags field */ @@ -195,12 +193,13 @@ struct css_set { /* * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. + * two fields are set. mg_src_cgrp and mg_dst_cgrp are + * respectively the source and destination cgroups of the on-going + * migration. mg_dst_cset is the destination cset the target tasks + * on this cset should be migrated to. Protected by cgroup_mutex. */ struct cgroup *mg_src_cgrp; + struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; /* @@ -239,6 +238,14 @@ struct cgroup { int id; /* + * The depth this cgroup is at. The root is at depth zero and each + * step down the hierarchy increments the level. This along with + * ancestor_ids[] can determine whether a given cgroup is a + * descendant of another without traversing the hierarchy. + */ + int level; + + /* * Each non-empty css_set associated with this cgroup contributes * one to populated_cnt. All children with non-zero popuplated_cnt * of their own contribute one. The count is zero iff there's no @@ -253,13 +260,14 @@ struct cgroup { /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. + * "cgroup.subtree_control" while ->child_ss_mask is the effective + * one which may have more subsystems enabled. Controller knobs + * are made available iff it's enabled in ->subtree_control. */ - unsigned int subtree_control; - unsigned int child_subsys_mask; + u16 subtree_control; + u16 subtree_ss_mask; + u16 old_subtree_control; + u16 old_subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; @@ -293,6 +301,12 @@ struct cgroup { /* used to schedule release agent */ struct work_struct release_agent_work; + + /* used to store eBPF programs */ + struct cgroup_bpf bpf; + + /* ids of the ancestors at each level including self */ + int ancestor_ids[]; }; /* @@ -312,6 +326,9 @@ struct cgroup_root { /* The root cgroup. Root is destroyed on its release. */ struct cgroup cgrp; + /* for cgrp->ancestor_ids[0] */ + int cgrp_ancestor_id_storage; + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; @@ -372,6 +389,9 @@ struct cftype { struct list_head node; /* anchored at ss->cfts */ struct kernfs_ops *kf_ops; + int (*open)(struct kernfs_open_file *of); + void (*release)(struct kernfs_open_file *of); + /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() @@ -412,6 +432,9 @@ struct cftype { ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); + unsigned int (*poll)(struct kernfs_open_file *of, + struct poll_table_struct *pt); + #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lockdep_key; #endif @@ -428,15 +451,14 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); void (*post_attach)(void); - int (*can_fork)(struct task_struct *task, void **priv_p); - void (*cancel_fork)(struct task_struct *task, void *priv); - void (*fork)(struct task_struct *task, void *priv); + int (*can_fork)(struct task_struct *task); + void (*cancel_fork)(struct task_struct *task); + void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); @@ -444,6 +466,19 @@ struct cgroup_subsys { int early_init; /* + * If %true, the controller, on the default hierarchy, doesn't show + * up in "cgroup.controllers" or "cgroup.subtree_control", is + * implicitly enabled on all cgroups on the default hierarchy, and + * bypasses the "no internal process" constraint. This is for + * utility type controllers which is transparent to userland. + * + * An implicit controller can be stolen from the default hierarchy + * anytime and thus must be okay with offline csses from previous + * hierarchies coexisting with csses for the current one. + */ + bool implicit_on_dfl:1; + + /* * If %false, this subsystem is properly hierarchical - * configuration, resource accounting and restriction on a parent * cgroup cover those of its children. If %true, hierarchy support @@ -522,7 +557,6 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ -#define CGROUP_CANFORK_COUNT 0 #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} @@ -530,4 +564,120 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +/* + * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains + * per-socket cgroup information except for memcg association. + * + * On legacy hierarchies, net_prio and net_cls controllers directly set + * attributes on each sock which can then be tested by the network layer. + * On the default hierarchy, each sock is associated with the cgroup it was + * created in and the networking layer can match the cgroup directly. + * + * To avoid carrying all three cgroup related fields separately in sock, + * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer. + * On boot, sock_cgroup_data records the cgroup that the sock was created + * in so that cgroup2 matches can be made; however, once either net_prio or + * net_cls starts being used, the area is overriden to carry prioidx and/or + * classid. The two modes are distinguished by whether the lowest bit is + * set. Clear bit indicates cgroup pointer while set bit prioidx and + * classid. + * + * While userland may start using net_prio or net_cls at any time, once + * either is used, cgroup2 matching no longer works. There is no reason to + * mix the two and this is in line with how legacy and v2 compatibility is + * handled. On mode switch, cgroup references which are already being + * pointed to by socks may be leaked. While this can be remedied by adding + * synchronization around sock_cgroup_data, given that the number of leaked + * cgroups is bound and highly unlikely to be high, this seems to be the + * better trade-off. + */ +struct sock_cgroup_data { + union { +#ifdef __LITTLE_ENDIAN + struct { + u8 is_data : 1; + u8 no_refcnt : 1; + u8 unused : 6; + u8 padding; + u16 prioidx; + u32 classid; + } __packed; +#else + struct { + u32 classid; + u16 prioidx; + u8 padding; + u8 unused : 6; + u8 no_refcnt : 1; + u8 is_data : 1; + } __packed; +#endif + u64 val; + }; +}; + +/* + * There's a theoretical window where the following accessors race with + * updaters and return part of the previous pointer as the prioidx or + * classid. Such races are short-lived and the result isn't critical. + */ +static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +{ + /* fallback to 1 which is always the ID of the root cgroup */ + return (skcd->is_data & 1) ? skcd->prioidx : 1; +} + +static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +{ + /* fallback to 0 which is the unconfigured default classid */ + return (skcd->is_data & 1) ? skcd->classid : 0; +} + +/* + * If invoked concurrently, the updaters may clobber each other. The + * caller is responsible for synchronization. + */ +static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, + u16 prioidx) +{ + struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; + + if (sock_cgroup_prioidx(&skcd_buf) == prioidx) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.prioidx = prioidx; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +} + +static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, + u32 classid) +{ + struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }}; + + if (sock_cgroup_classid(&skcd_buf) == classid) + return; + + if (!(skcd_buf.is_data & 1)) { + skcd_buf.val = 0; + skcd_buf.is_data = 1; + } + + skcd_buf.classid = classid; + WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */ +} + +#else /* CONFIG_SOCK_CGROUP_DATA */ + +struct sock_cgroup_data { +}; + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8607c937145f..ca47b5d42764 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,6 +17,11 @@ #include <linux/seq_file.h> #include <linux/kernfs.h> #include <linux/jump_label.h> +#include <linux/nsproxy.h> +#include <linux/types.h> +#include <linux/ns_common.h> +#include <linux/nsproxy.h> +#include <linux/user_namespace.h> #include <linux/cgroup-defs.h> @@ -81,7 +86,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); -bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); +struct cgroup *cgroup_get_from_path(const char *path); +struct cgroup *cgroup_get_from_fd(int fd); + int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); @@ -90,18 +97,15 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); -char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); +int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -extern int cgroup_can_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]); -extern void cgroup_cancel_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]); -extern void cgroup_post_fork(struct task_struct *p, - void *old_ss_priv[CGROUP_CANFORK_COUNT]); +extern int cgroup_can_fork(struct task_struct *p); +extern void cgroup_cancel_fork(struct task_struct *p); +extern void cgroup_post_fork(struct task_struct *p); void cgroup_exit(struct task_struct *p); void cgroup_free(struct task_struct *p); @@ -384,6 +388,21 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) percpu_ref_put_many(&css->refcnt, n); } +static inline void cgroup_get(struct cgroup *cgrp) +{ + css_get(&cgrp->self); +} + +static inline bool cgroup_tryget(struct cgroup *cgrp) +{ + return css_tryget(&cgrp->self); +} + +static inline void cgroup_put(struct cgroup *cgrp) +{ + css_put(&cgrp->self); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -497,6 +516,54 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } +static inline struct cgroup *task_dfl_cgroup(struct task_struct *task) +{ + return task_css_set(task)->dfl_cgrp; +} + +static inline struct cgroup *cgroup_parent(struct cgroup *cgrp) +{ + struct cgroup_subsys_state *parent_css = cgrp->self.parent; + + if (parent_css) + return container_of(parent_css, struct cgroup, self); + return NULL; +} + +/** + * cgroup_is_descendant - test ancestry + * @cgrp: the cgroup to be tested + * @ancestor: possible ancestor of @cgrp + * + * Test whether @cgrp is a descendant of @ancestor. It also returns %true + * if @cgrp == @ancestor. This function is safe to call as long as @cgrp + * and @ancestor are accessible. + */ +static inline bool cgroup_is_descendant(struct cgroup *cgrp, + struct cgroup *ancestor) +{ + if (cgrp->root != ancestor->root || cgrp->level < ancestor->level) + return false; + return cgrp->ancestor_ids[ancestor->level] == ancestor->id; +} + +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -538,9 +605,9 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) return kernfs_name(cgrp->kn, buf, buflen); } -static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, - size_t buflen) +static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen) { + return kernfs_path(cgrp->kn, buf, buflen); } @@ -576,6 +643,7 @@ static inline void cgroup_kthread_ready(void) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -584,21 +652,111 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline int cgroup_can_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) -{ return 0; } -static inline void cgroup_cancel_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) {} -static inline void cgroup_post_fork(struct task_struct *p, - void *ss_priv[CGROUP_CANFORK_COUNT]) {} +static inline int cgroup_can_fork(struct task_struct *p) { return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p) {} +static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p) {} static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } + +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} + static inline void cgroup_init_kthreadd(void) {} static inline void cgroup_kthread_ready(void) {} #endif /* !CONFIG_CGROUPS */ +/* + * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data + * definition in cgroup-defs.h. + */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) +extern spinlock_t cgroup_sk_update_lock; +#endif + +void cgroup_sk_alloc_disable(void); +void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_clone(struct sock_cgroup_data *skcd); +void cgroup_sk_free(struct sock_cgroup_data *skcd); + +static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) +{ +#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID) + unsigned long v; + + /* + * @skcd->val is 64bit but the following is safe on 32bit too as we + * just need the lower ulong to be written and read atomically. + */ + v = READ_ONCE(skcd->val); + + if (v & 3) + return &cgrp_dfl_root.cgrp; + + return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; +#else + return (struct cgroup *)(unsigned long)skcd->val; +#endif +} + +#else /* CONFIG_CGROUP_DATA */ + +static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} + +#endif /* CONFIG_CGROUP_DATA */ + +struct cgroup_namespace { + atomic_t count; + struct ns_common ns; + struct user_namespace *user_ns; + struct css_set *root_cset; +}; + +extern struct cgroup_namespace init_cgroup_ns; + +#ifdef CONFIG_CGROUPS + +void free_cgroup_ns(struct cgroup_namespace *ns); + +struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, + struct user_namespace *user_ns, + struct cgroup_namespace *old_ns); + +int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, + struct cgroup_namespace *ns); + +#else /* !CONFIG_CGROUPS */ + +static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } +static inline struct cgroup_namespace * +copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, + struct cgroup_namespace *old_ns) +{ + return old_ns; +} + +#endif /* !CONFIG_CGROUPS */ + +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + if (ns) + atomic_inc(&ns->count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (ns && atomic_dec_and_test(&ns->count)) + free_cgroup_ns(ns); +} + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e133705d794a..7f4a2a5a2a77 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -6,14 +6,8 @@ /* * This file *must* be included with SUBSYS() defined. - * SUBSYS_TAG() is a noop if undefined. */ -#ifndef SUBSYS_TAG -#define __TMP_SUBSYS_TAG -#define SUBSYS_TAG(_x) -#endif - #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -62,17 +56,10 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif -/* - * Subsystems that implement the can_fork() family of callbacks. - */ -SUBSYS_TAG(CANFORK_START) - #if IS_ENABLED(CONFIG_CGROUP_PIDS) SUBSYS(pids) #endif -SUBSYS_TAG(CANFORK_END) - /* * The following subsystems are not supported on the default hierarchy. */ @@ -80,11 +67,6 @@ SUBSYS_TAG(CANFORK_END) SUBSYS(debug) #endif -#ifdef __TMP_SUBSYS_TAG -#undef __TMP_SUBSYS_TAG -#undef SUBSYS_TAG -#endif - /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ diff --git a/include/linux/compat.h b/include/linux/compat.h index a76c9172b2eb..c4bdce045492 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -5,6 +5,8 @@ * syscall compatibility layer. */ +#include <linux/types.h> + #ifdef CONFIG_COMPAT #include <linux/stat.h> @@ -306,8 +308,6 @@ struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; -extern void compat_exit_robust_list(struct task_struct *curr); - asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); @@ -713,9 +713,22 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); + +/* + * For most but not all architectures, "am I in a compat syscall?" and + * "am I a compat task?" are the same question. For architectures on which + * they aren't the same question, arch code can override in_compat_syscall. + */ + +#ifndef in_compat_syscall +static inline bool in_compat_syscall(void) { return is_compat_task(); } +#endif + #else #define is_compat_task() (0) +static inline bool in_compat_syscall(void) { return false; } #endif /* CONFIG_COMPAT */ + #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de179993e039..6851c4214ac6 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -15,3 +15,17 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +/* + * Not all versions of clang implement the the type-generic versions + * of the builtin overflow checkers. Fortunately, clang implements + * __has_builtin allowing us to avoid awkward version + * checks. Unfortunately, we don't know which version of gcc clang + * pretends to be, so the macro may or may not be defined. + */ +#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW +#if __has_builtin(__builtin_mul_overflow) && \ + __has_builtin(__builtin_add_overflow) && \ + __has_builtin(__builtin_sub_overflow) +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index af8b4a879934..bce8a8f5b562 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -145,6 +145,12 @@ #if GCC_VERSION < 30200 # error Sorry, your compiler is too old - please upgrade it. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 && !defined(__clang__) +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif #if GCC_VERSION < 30300 @@ -315,3 +321,7 @@ * code */ #define uninitialized_var(x) x = x + +#if GCC_VERSION >= 50100 +#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d4c71132d07f..8c9897b1b953 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -43,3 +43,7 @@ #define __builtin_bswap16 _bswap16 #endif +/* + * icc defines __GNUC__, but does not implement the builtin overflow checkers. + */ +#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5508011cc0c7..759fb028acc8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -201,6 +201,29 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define unreachable() do { } while (1) #endif +/* + * KENTRY - kernel entry point + * This can be used to annotate symbols (functions or data) that are used + * without their linker symbol being referenced explicitly. For example, + * interrupt vector handlers, or functions in the kernel image that are found + * programatically. + * + * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those + * are handled in their own way (with KEEP() in linker scripts). + * + * KENTRY can be avoided if the symbols in question are marked as KEEP() in the + * linker script. For example an architecture could KEEP() its entire + * boot/exception vector code rather than annotate each function and data. + */ +#ifndef KENTRY +# define KENTRY(sym) \ + extern typeof(sym) sym; \ + static const unsigned long __kentry_##sym \ + __used \ + __attribute__((section("___kentry" "+" #sym ), used)) \ + = (unsigned long)&sym; +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ @@ -208,6 +231,8 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); (typeof(ptr)) (__ptr + (off)); }) #endif +#define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) + #ifndef OPTIMIZER_HIDE_VAR #define OPTIMIZER_HIDE_VAR(var) barrier() #endif @@ -241,23 +266,21 @@ void __read_once_size(const volatile void *p, void *res, int size) #ifdef CONFIG_KASAN /* - * This function is not 'inline' because __no_sanitize_address confilcts + * We can't declare function 'inline' because __no_sanitize_address confilcts * with inlining. Attempt to inline it may cause a build failure. * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 * '__maybe_unused' allows us to avoid defined-but-not-used warnings. */ -static __no_sanitize_address __maybe_unused -void __read_once_size_nocheck(const volatile void *p, void *res, int size) -{ - __READ_ONCE_SIZE; -} +# define __no_kasan_or_inline __no_sanitize_address __maybe_unused #else -static __always_inline +# define __no_kasan_or_inline __always_inline +#endif + +static __no_kasan_or_inline void __read_once_size_nocheck(const volatile void *p, void *res, int size) { __READ_ONCE_SIZE; } -#endif static __always_inline void __write_once_size(volatile void *p, void *res, int size) { @@ -294,6 +317,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. */ +#include <linux/kasan-checks.h> #define __READ_ONCE(x, check) \ ({ \ @@ -312,6 +336,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s */ #define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0) +static __no_kasan_or_inline +unsigned long read_word_at_a_time(const void *addr) +{ + kasan_check_read(addr, 1); + return *(unsigned long *)addr; +} + #define WRITE_ONCE(x, val) \ ({ \ union { typeof(x) __val; char __c[1]; } __u = \ @@ -502,7 +533,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * compiler has support to do so. */ #define compiletime_assert(condition, msg) \ - _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) #define compiletime_assert_atomic_type(t) \ compiletime_assert(__native_word(t), \ @@ -556,4 +587,11 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __kprobes # define nokprobe_inline inline #endif + +/* + * This is needed in functions which generate the stack canary, see + * arch/x86/kernel/smpboot.c::start_secondary() for an example. + */ +#define prevent_tail_call_optimization() mb() + #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index e329ee2667e1..3a7fd222845e 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -29,6 +29,7 @@ struct vc_data { unsigned int vc_rows; unsigned int vc_size_row; /* Bytes per row */ unsigned int vc_scan_lines; /* # of scan lines */ + unsigned int vc_cell_height; /* CRTC character cell height */ unsigned long vc_origin; /* [!] Start of real screen */ unsigned long vc_scr_end; /* [!] End of real screen */ unsigned long vc_visible_origin; /* [!] Top of visible window */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 0c84c69d54f8..8e9d08dfbd18 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -100,10 +100,6 @@ struct cpufreq_policy { * - Any routine that will write to the policy structure and/or may take away * the policy altogether (eg. CPU hotplug), will hold this lock in write * mode before doing so. - * - * Additional rules: - * - Lock should not be held across - * __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); */ struct rw_semaphore rwsem; @@ -127,6 +123,9 @@ struct cpufreq_policy { unsigned int up_transition_delay_us; unsigned int down_transition_delay_us; + /* Boost switch for tasks with p->in_iowait set */ + bool iowait_boost_enable; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index e0bfecde4e7b..50f16bd149c0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -174,6 +174,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #define for_each_cpu_not(cpu, mask) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) #define for_each_cpu_and(cpu, mask, and) \ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) #else @@ -246,6 +248,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node); (cpu) = cpumask_next_zero((cpu), (mask)), \ (cpu) < nr_cpu_ids;) +extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); + +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask poiter + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + /** * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator diff --git a/include/linux/cred.h b/include/linux/cred.h index d2db1da3036c..ee2b36cdb80d 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -243,7 +243,7 @@ static inline struct cred *get_new_cred(struct cred *cred) * @cred: The credentials to reference * * Get a reference on the specified set of credentials. The caller must - * release the reference. + * release the reference. If %NULL is passed, it is returned with no action. * * This is used to deal with a committed set of credentials. Although the * pointer is const, this will temporarily discard the const and increment the @@ -254,6 +254,8 @@ static inline struct cred *get_new_cred(struct cred *cred) static inline const struct cred *get_cred(const struct cred *cred) { struct cred *nonconst_cred = (struct cred *) cred; + if (!cred) + return cred; validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred(nonconst_cred); @@ -264,7 +266,7 @@ static inline const struct cred *get_cred(const struct cred *cred) * @cred: The credentials to release * * Release a reference to a set of credentials, deleting them when the last ref - * is released. + * is released. If %NULL is passed, nothing is done. * * This takes a const pointer to a set of credentials because the credentials * on task_struct are attached by const pointers to prevent accidental @@ -274,9 +276,11 @@ static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; - validate_creds(cred); - if (atomic_dec_and_test(&(cred)->usage)) - __put_cred(cred); + if (cred) { + validate_creds(cred); + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); + } } /** diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 19c066dce1da..51362ff607fc 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -60,9 +60,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data); void debugfs_remove(struct dentry *dentry); diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index 7adf6cc4b305..633346b84cae 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -53,7 +53,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct thermal_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b393ab66073a..71a205c5c125 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -556,8 +556,6 @@ extern struct ratelimit_state dm_ratelimit_state; #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ 0 : scnprintf(result + sz, maxlen - sz, x)) -#define SECTOR_SHIFT 9 - /* * Definitions of return values from target end_io function. */ diff --git a/include/linux/device.h b/include/linux/device.h index 4d67d85dd5e7..9bb803cf54fc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -677,7 +677,8 @@ extern unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); extern void devm_free_pages(struct device *dev, unsigned long addr); -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res); /* allows to add/remove a custom action to devres stack */ int devm_add_action(struct device *dev, void (*action)(void *), void *data); @@ -805,6 +806,7 @@ struct device { struct dev_pin_info *pins; #endif #ifdef CONFIG_GENERIC_MSI_IRQ + raw_spinlock_t msi_lock; struct list_head msi_list; #endif diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3f6e858d42f1..1d4cb72f7fd7 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -189,8 +189,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index a16d1851cfb1..f1bd19c69fec 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1207,8 +1207,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) { struct dma_slave_caps caps; + int ret; - dma_get_slave_caps(tx->chan, &caps); + ret = dma_get_slave_caps(tx->chan, &caps); + if (ret) + return ret; if (caps.descriptor_reuse) { tx->flags |= DMA_CTRL_REUSE; diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h index 489b99e37128..0f0b66491940 100644 --- a/include/linux/ecryptfs.h +++ b/include/linux/ecryptfs.h @@ -158,7 +158,7 @@ static inline int ecryptfs_register_to_events( return 1; /* dummy handle */ } -static int ecryptfs_unregister_from_events(int user_handle) +static inline int ecryptfs_unregister_from_events(int user_handle) { return 0; } @@ -189,13 +189,14 @@ static inline bool ecryptfs_cipher_match(const void *ecrytpfs_data, return false; } -bool ecryptfs_is_page_in_metadata(const void *ecrytpfs_data, pgoff_t offset) +static inline bool ecryptfs_is_page_in_metadata(const void *ecrytpfs_data, + pgoff_t offset) { return false; } -bool ecryptfs_is_data_equal(const void *ecrytpfs_data1, - const void *ecrytpfs_data2) +static inline bool ecryptfs_is_data_equal(const void *ecrytpfs_data1, + const void *ecrytpfs_data2) { return false; } diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 698d51a0eea3..a65dadad65bf 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -55,6 +55,7 @@ static inline int elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregse } #endif +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -69,5 +70,26 @@ elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); extern size_t elf_core_extra_data_size(void); +#else +static inline Elf_Half elf_core_extra_phdrs(void) +{ + return 0; +} + +static inline int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + return 1; +} + +static inline int elf_core_write_extra_data(struct coredump_params *cprm) +{ + return 1; +} + +static inline size_t elf_core_extra_data_size(void) +{ + return 0; +} +#endif #endif /* _LINUX_ELFCORE_H */ diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 278e3ef05336..56c6d9031663 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -53,7 +53,7 @@ .popsection ; #define ELFNOTE(name, type, desc) \ - ELFNOTE_START(name, type, "") \ + ELFNOTE_START(name, type, "a") \ desc ; \ ELFNOTE_END diff --git a/include/linux/export.h b/include/linux/export.h index 96e45ea463e7..a84c76fdebd8 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -1,5 +1,6 @@ #ifndef _LINUX_EXPORT_H #define _LINUX_EXPORT_H + /* * Export symbols from the kernel to modules. Forked from module.h * to reduce the amount of pointless cruft we feed to gcc when only @@ -42,11 +43,11 @@ extern struct module __this_module; #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to * generate a checksums for some symbols */ -#define __CRC_SYMBOL(sym, sec) \ - extern __visible void *__crc_##sym __attribute__((weak)); \ - static const unsigned long __kcrctab_##sym \ - __used \ - __attribute__((section("___kcrctab" sec "+" #sym), unused)) \ +#define __CRC_SYMBOL(sym, sec) \ + extern __visible void *__crc_##sym __attribute__((weak)); \ + static const unsigned long __kcrctab_##sym \ + __used \ + __attribute__((section("___kcrctab" sec "+" #sym), used)) \ = (unsigned long) &__crc_##sym; #else #define __CRC_SYMBOL(sym, sec) @@ -59,8 +60,7 @@ extern struct module __this_module; static const char __kstrtab_##sym[] \ __attribute__((section("__ksymtab_strings"), aligned(1))) \ = VMLINUX_SYMBOL_STR(sym); \ - extern const struct kernel_symbol __ksymtab_##sym; \ - __visible const struct kernel_symbol __ksymtab_##sym \ + static const struct kernel_symbol __ksymtab_##sym \ __used \ __attribute__((section("___ksymtab" sec "+" #sym), unused)) \ = { (unsigned long)&sym, __kstrtab_##sym } diff --git a/include/linux/file.h b/include/linux/file.h index f87d30882a24..67f0888abdde 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -12,6 +12,7 @@ struct file; extern void fput(struct file *); +extern void fput_many(struct file *, unsigned int); struct file_operations; struct vfsmount; @@ -40,6 +41,7 @@ static inline void fdput(struct fd fd) } extern struct file *fget(unsigned int fd); +extern struct file *fget_many(unsigned int fd, unsigned int refs); extern struct file *fget_raw(unsigned int fd); extern unsigned long __fdget(unsigned int fd); extern unsigned long __fdget_raw(unsigned int fd); diff --git a/include/linux/filter.h b/include/linux/filter.h index 677fa3b42194..22e9bd1cf047 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,8 @@ #include <linux/printk.h> #include <linux/workqueue.h> #include <linux/sched.h> +#include <linux/capability.h> + #include <net/sch_generic.h> #include <asm/cacheflush.h> @@ -42,6 +44,18 @@ struct bpf_prog_aux; #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_8 +/* Kernel hidden auxiliary/helper register for hardening step. + * Only used by eBPF JITs. It's nothing more than a temporary + * register that JITs use internally, only that here it's part + * of eBPF instructions that have been rewritten for blinding + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ +#define BPF_REG_AX MAX_BPF_REG +#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) + +/* unused opcode to mark special call to bpf_tail_call() helper */ +#define BPF_TAIL_CALL 0xf0 + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -303,6 +317,70 @@ struct bpf_prog_aux; bpf_size; \ }) +#define BPF_SIZEOF(type) \ + ({ \ + const int __size = bytes_to_bpf_size(sizeof(type)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define BPF_FIELD_SIZEOF(type, field) \ + ({ \ + const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define __BPF_MAP_0(m, v, ...) v +#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) +#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) +#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) +#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) +#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) + +#define __BPF_REG_0(...) __BPF_PAD(5) +#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) +#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) +#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) +#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) +#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) + +#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) +#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) + +#define __BPF_CAST(t, a) \ + (__force t) \ + (__force \ + typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ + (unsigned long)0, (t)0))) a +#define __BPF_V void +#define __BPF_N + +#define __BPF_DECL_ARGS(t, a) t a +#define __BPF_DECL_REGS(t, a) u64 a + +#define __BPF_PAD(n) \ + __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ + u64, __ur_3, u64, __ur_4, u64, __ur_5) + +#define BPF_CALL_x(x, name, ...) \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + { \ + return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + } \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) + +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -316,7 +394,12 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +#define BPF_BINARY_HEADER_MAGIC 0x05de0e82 + struct bpf_binary_header { +#ifdef CONFIG_CFI_CLANG + u32 magic; +#endif unsigned int pages; u8 image[]; }; @@ -348,27 +431,119 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#if IS_ENABLED(CONFIG_BPF_JIT) && IS_ENABLED(CONFIG_CFI_CLANG) +/* + * With JIT, the kernel makes an indirect call to dynamically generated + * code. Use bpf_call_func to perform additional validation of the call + * target to narrow down attack surface. Architectures implementing BPF + * JIT can override arch_bpf_jit_check_func for arch-specific checking. + */ +extern bool arch_bpf_jit_check_func(const struct bpf_prog *prog); + +static inline unsigned int __bpf_call_func(const struct bpf_prog *prog, + const void *ctx) +{ + /* Call interpreter with CFI checking. */ + return prog->bpf_func(ctx, prog->insnsi); +} + +static inline unsigned int __nocfi bpf_call_func(const struct bpf_prog *prog, + const void *ctx) +{ + unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK; + const struct bpf_binary_header *hdr = (void *)addr; + + if (!IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) && !prog->jited) + return __bpf_call_func(prog, ctx); + + /* + * We are about to call dynamically generated code. Check that the + * page has bpf_binary_header with a valid magic to limit possible + * call targets. + */ + BUG_ON(hdr->magic != BPF_BINARY_HEADER_MAGIC || + !arch_bpf_jit_check_func(prog)); + + /* Call jited function without CFI checking. */ + return prog->bpf_func(ctx, prog->insnsi); +} + +static inline void bpf_jit_set_header_magic(struct bpf_binary_header *hdr) +{ + hdr->magic = BPF_BINARY_HEADER_MAGIC; +} +#else +static inline unsigned int bpf_call_func(const struct bpf_prog *prog, + const void *ctx) +{ + return prog->bpf_func(ctx, prog->insnsi); +} + +static inline void bpf_jit_set_header_magic(struct bpf_binary_header *hdr) +{ +} +#endif + +#define BPF_PROG_RUN(filter, ctx) bpf_call_func(filter, ctx) + +#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN + +struct bpf_skb_data_end { + struct qdisc_skb_cb qdisc_cb; + void *data_end; +}; + +struct xdp_buff { + void *data; + void *data_end; +}; + +/* compute the linear packet data range [data, data_end) which + * will be accessed by cls_bpf and act_bpf programs + */ +static inline void bpf_compute_data_end(struct sk_buff *skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); + cb->data_end = skb->data + skb_headlen(skb); +} + +static inline u8 *bpf_skb_cb(struct sk_buff *skb) +{ + /* eBPF programs may read/write skb->cb[] area to transfer meta + * data between tail calls. Since this also needs to work with + * tc, that scratch memory is mapped to qdisc_skb_cb's data area. + * + * In some socket filter cases, the cb unfortunately needs to be + * saved/restored so that protocol specific skb->cb[] data won't + * be lost. In any case, due to unpriviledged eBPF programs + * attached to sockets, we need to clear the bpf_skb_cb() area + * to not leak previous contents to user space. + */ + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN); + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != + FIELD_SIZEOF(struct qdisc_skb_cb, data)); + + return qdisc_skb_cb(skb)->data; +} static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { - u8 *cb_data = qdisc_skb_cb(skb)->data; - u8 saved_cb[QDISC_CB_PRIV_LEN]; + u8 *cb_data = bpf_skb_cb(skb); + u8 cb_saved[BPF_SKB_CB_LEN]; u32 res; - BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != - QDISC_CB_PRIV_LEN); - if (unlikely(prog->cb_access)) { - memcpy(saved_cb, cb_data, sizeof(saved_cb)); - memset(cb_data, 0, sizeof(saved_cb)); + memcpy(cb_saved, cb_data, sizeof(cb_saved)); + memset(cb_data, 0, sizeof(cb_saved)); } res = BPF_PROG_RUN(prog, skb); if (unlikely(prog->cb_access)) - memcpy(cb_data, saved_cb, sizeof(saved_cb)); + memcpy(cb_data, cb_saved, sizeof(cb_saved)); return res; } @@ -376,13 +551,26 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { - u8 *cb_data = qdisc_skb_cb(skb)->data; + u8 *cb_data = bpf_skb_cb(skb); if (unlikely(prog->cb_access)) - memset(cb_data, 0, QDISC_CB_PRIV_LEN); + memset(cb_data, 0, BPF_SKB_CB_LEN); + return BPF_PROG_RUN(prog, skb); } +static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 ret; + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, (void *)xdp); + rcu_read_unlock(); + + return ret; +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), @@ -427,7 +615,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, 1); } -int bpf_prog_select_runtime(struct bpf_prog *fp); +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); @@ -450,12 +638,10 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, - bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); -int __sk_detach_filter(struct sock *sk, bool locked); - int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); @@ -463,13 +649,21 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct bpf_prog *fp); + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_skb_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT +extern int bpf_jit_enable; +extern int bpf_jit_harden; +extern long bpf_jit_limit; +extern long bpf_jit_limit_max; + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); struct bpf_binary_header * @@ -478,9 +672,11 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); -void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { @@ -491,11 +687,34 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } -#else -static inline void bpf_jit_compile(struct bpf_prog *fp) + +static inline bool bpf_jit_is_ebpf(void) { +# ifdef CONFIG_HAVE_EBPF_JIT + return true; +# else + return false; +# endif } +static inline bool bpf_jit_blinding_enabled(void) +{ + /* These are the prerequisites, should someone ever have the + * idea to call blinding outside of them, we make sure to + * bail out. + */ + if (!bpf_jit_is_ebpf()) + return false; + if (!bpf_jit_enable) + return false; + if (!bpf_jit_harden) + return false; + if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + return false; + + return true; +} +#else static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); diff --git a/include/linux/font.h b/include/linux/font.h index d6821769dd1e..f85e70bd4793 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -57,4 +57,17 @@ extern const struct font_desc *get_default_font(int xres, int yres, /* Max. length for the name of a predefined font */ #define MAX_FONT_NAME 32 +/* Extra word getters */ +#define REFCOUNT(fd) (((int *)(fd))[-1]) +#define FNTSIZE(fd) (((int *)(fd))[-2]) +#define FNTCHARCNT(fd) (((int *)(fd))[-3]) +#define FNTSUM(fd) (((int *)(fd))[-4]) + +#define FONT_EXTRA_WORDS 4 + +struct font_data { + unsigned int extra[FONT_EXTRA_WORDS]; + const unsigned char data[]; +} __packed; + #endif /* _VIDEO_FONT_H */ diff --git a/include/linux/frame.h b/include/linux/frame.h new file mode 100644 index 000000000000..d772c61c31da --- /dev/null +++ b/include/linux/frame.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_FRAME_H +#define _LINUX_FRAME_H + +#ifdef CONFIG_STACK_VALIDATION +/* + * This macro marks the given function's stack frame as "non-standard", which + * tells objtool to ignore the function when doing stack metadata validation. + * It should only be used in special cases where you're 100% sure it won't + * affect the reliability of frame pointers and kernel stack traces. + * + * For more information, see tools/objtool/Documentation/stack-validation.txt. + */ +#define STACK_FRAME_NON_STANDARD(func) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ + *__func_stack_frame_non_standard_##func = func + +#else /* !CONFIG_STACK_VALIDATION */ + +#define STACK_FRAME_NON_STANDARD(func) + +#endif /* CONFIG_STACK_VALIDATION */ + +#endif /* _LINUX_FRAME_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index cb599dd90018..5105b5be5e68 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -953,7 +953,7 @@ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ - unsigned char f_handle[0]; + unsigned char f_handle[]; }; static inline struct file *get_file(struct file *f) @@ -961,7 +961,9 @@ static inline struct file *get_file(struct file *f) atomic_long_inc(&f->f_count); return f; } -#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count) +#define get_file_rcu_many(x, cnt) \ + atomic_long_add_unless(&(x)->f_count, (cnt), 0) +#define get_file_rcu(x) get_file_rcu_many((x), 1) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) @@ -1441,6 +1443,13 @@ struct super_block { struct hlist_head s_pins; /* + * Owning user namespace and default context in which to + * interpret filesystem uids, gids, quotas, device nodes, + * xattrs and security labels. + */ + struct user_namespace *s_user_ns; + + /* * Keep the lru lists last in the structure so they always sit on their * own individual cachelines. */ @@ -1936,6 +1945,10 @@ struct super_operations { * wb stat updates to grab mapping->tree_lock. See * inode_switch_wb_work_fn() for details. * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -1953,9 +1966,9 @@ struct super_operations { #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define __I_DIRTY_TIME_EXPIRED 12 -#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) +#define I_SYNC_QUEUED (1 << 17) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) @@ -2053,8 +2066,9 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, - void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_ns(struct file_system_type *fs_type, + int flags, void *data, void *ns, struct user_namespace *user_ns, + int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2074,6 +2088,11 @@ void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); +struct super_block *sget_userns(struct file_system_type *type, + int (*test)(struct super_block *,void *), + int (*set)(struct super_block *,void *), + int flags, struct user_namespace *user_ns, + void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), @@ -2911,7 +2930,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); -extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +extern int simple_rename(struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 312a4ef093e3..c0b08bbe85b2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -244,8 +244,16 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } +#ifdef CONFIG_CFI_CLANG +/* Use a C stub with the correct type for CFI */ +static inline void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs) +{ +} +#else extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); +#endif #else /* !CONFIG_FUNCTION_TRACER */ /* @@ -748,7 +756,9 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* for init task */ -#define INIT_FTRACE_GRAPH .ret_stack = NULL, +#define INIT_FTRACE_GRAPH \ + .ret_stack = NULL, \ + .tracing_graph_pause = ATOMIC_INIT(0), /* * Stack of return addresses for functions diff --git a/include/linux/futex.h b/include/linux/futex.h index c015fa91e7cc..0f294ae63c78 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -1,6 +1,8 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include <linux/sched.h> + #include <uapi/linux/futex.h> struct inode; @@ -11,9 +13,6 @@ union ktime; long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int -handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); - /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. @@ -56,19 +55,34 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX -extern void exit_robust_list(struct task_struct *curr); -extern void exit_pi_state_list(struct task_struct *curr); -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int futex_cmpxchg_enabled; -#endif -#else -static inline void exit_robust_list(struct task_struct *curr) -{ -} -static inline void exit_pi_state_list(struct task_struct *curr) +enum { + FUTEX_STATE_OK, + FUTEX_STATE_EXITING, + FUTEX_STATE_DEAD, +}; + +static inline void futex_init_task(struct task_struct *tsk) { + tsk->robust_list = NULL; +#ifdef CONFIG_COMPAT + tsk->compat_robust_list = NULL; +#endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; + mutex_init(&tsk->futex_exit_mutex); } + +void futex_exit_recursive(struct task_struct *tsk); +void futex_exit_release(struct task_struct *tsk); +void futex_exec_release(struct task_struct *tsk); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +#else +static inline void futex_init_task(struct task_struct *tsk) { } +static inline void futex_exit_recursive(struct task_struct *tsk) { } +static inline void futex_exit_release(struct task_struct *tsk) { } +static inline void futex_exec_release(struct task_struct *tsk) { } #endif #endif diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5012fcdb4c9e..a27d0aef07f6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -727,9 +727,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) { #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) preempt_disable(); part->nr_sects = size; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index a5f251d76018..858031d58899 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -535,7 +535,7 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order); void page_alloc_init(void); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); -void drain_local_pages(struct zone *zone); +void drain_local_pages(void *zone); #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT void page_alloc_init_late(void); diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index fb0fde686cb1..4a9838feb086 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -398,7 +398,7 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc) static inline struct gpio_desc *gpio_to_desc(unsigned gpio) { - return ERR_PTR(-EINVAL); + return NULL; } static inline int desc_to_gpio(const struct gpio_desc *desc) diff --git a/include/linux/hid.h b/include/linux/hid.h index 2a6dc0ab9b88..e709a4cf8999 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -246,6 +246,8 @@ struct hid_item { #define HID_CP_SELECTION 0x000c0080 #define HID_CP_MEDIASELECTION 0x000c0087 #define HID_CP_SELECTDISC 0x000c00ba +#define HID_CP_VOLUMEUP 0x000c00e9 +#define HID_CP_VOLUMEDOWN 0x000c00ea #define HID_CP_PLAYBACKSPEED 0x000c00f1 #define HID_CP_PROXIMITY 0x000c0109 #define HID_CP_SPEAKERSYSTEM 0x000c0160 @@ -385,7 +387,6 @@ struct hid_local { unsigned usage_minimum; unsigned delimiter_depth; unsigned delimiter_branch; - unsigned int usage_page_preceding; }; /* @@ -459,7 +460,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 @@ -529,10 +530,12 @@ struct hid_device { /* device report descriptor */ * battery is non-NULL. */ struct power_supply *battery; + __s32 battery_capacity; __s32 battery_min; __s32 battery_max; __s32 battery_report_type; __s32 battery_report_id; + bool battery_reported; #endif unsigned int status; /* see STAT flags above */ @@ -734,6 +737,7 @@ struct hid_driver { * @raw_request: send raw report request to device (e.g. feature report) * @output_report: send output report to device * @idle: send idle request to device + * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE) */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -758,8 +762,26 @@ struct hid_ll_driver { int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len); int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype); + + unsigned int max_buffer_size; }; +extern struct hid_ll_driver i2c_hid_ll_driver; +extern struct hid_ll_driver hidp_hid_driver; +extern struct hid_ll_driver uhid_hid_driver; +extern struct hid_ll_driver usb_hid_driver; + +static inline bool hid_is_using_ll_driver(struct hid_device *hdev, + struct hid_ll_driver *driver) +{ + return hdev->ll_driver == driver; +} + +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 @@ -874,34 +896,49 @@ static inline void hid_device_io_stop(struct hid_device *hid) { * @max: maximal valid usage->code to consider later (out parameter) * @type: input event type (EV_KEY, EV_REL, ...) * @c: code which corresponds to this usage and type + * + * The value pointed to by @bit will be set to NULL if either @type is + * an unhandled event type, or if @c is out of range for @type. This + * can be used as an error condition. */ static inline void hid_map_usage(struct hid_input *hidinput, struct hid_usage *usage, unsigned long **bit, int *max, - __u8 type, __u16 c) + __u8 type, unsigned int c) { struct input_dev *input = hidinput->input; - - usage->type = type; - usage->code = c; + unsigned long *bmap = NULL; + unsigned int limit = 0; switch (type) { case EV_ABS: - *bit = input->absbit; - *max = ABS_MAX; + bmap = input->absbit; + limit = ABS_MAX; break; case EV_REL: - *bit = input->relbit; - *max = REL_MAX; + bmap = input->relbit; + limit = REL_MAX; break; case EV_KEY: - *bit = input->keybit; - *max = KEY_MAX; + bmap = input->keybit; + limit = KEY_MAX; break; case EV_LED: - *bit = input->ledbit; - *max = LED_MAX; + bmap = input->ledbit; + limit = LED_MAX; break; } + + if (unlikely(c > limit || !bmap)) { + pr_warn_ratelimited("%s: Invalid code %d type %d\n", + input->name, c, type); + *bit = NULL; + return; + } + + usage->type = type; + usage->code = c; + *max = limit; + *bit = bmap; } /** @@ -915,7 +952,8 @@ static inline void hid_map_usage_clear(struct hid_input *hidinput, __u8 type, __u16 c) { hid_map_usage(hidinput, usage, bit, max, type, c); - clear_bit(c, *bit); + if (*bit) + clear_bit(usage->code, *bit); } /** @@ -1109,8 +1147,7 @@ static inline void hid_hw_wait(struct hid_device *hdev) */ static inline u32 hid_report_len(struct hid_report *report) { - /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ - return ((report->size - 1) >> 3) + 1 + (report->id > 0); + return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 394a8405dd74..e0521a1d9325 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b71d142a93f3..151ff61419dd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -92,7 +92,7 @@ void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve); extern struct mutex *hugetlb_fault_mutex_table; u32 hugetlb_fault_mutex_hash(struct hstate *h, struct address_space *mapping, - pgoff_t idx, unsigned long address); + pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -504,6 +504,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) { atomic_long_sub(l, &mm->hugetlb_usage); } + +void set_page_huge_active(struct page *page); + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page(v, a, r) NULL diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h index a3c3ecd59f08..7a43afd27365 100644 --- a/include/linux/i2c-algo-pca.h +++ b/include/linux/i2c-algo-pca.h @@ -52,6 +52,20 @@ #define I2C_PCA_CON_SI 0x08 /* Serial Interrupt */ #define I2C_PCA_CON_CR 0x07 /* Clock Rate (MASK) */ +/** + * struct pca_i2c_bus_settings - The configured PCA i2c bus settings + * @mode: Configured i2c bus mode + * @tlow: Configured SCL LOW period + * @thi: Configured SCL HIGH period + * @clock_freq: The configured clock frequency + */ +struct pca_i2c_bus_settings { + int mode; + int tlow; + int thi; + int clock_freq; +}; + struct i2c_algo_pca_data { void *data; /* private low level data */ void (*write_byte) (void *data, int reg, int val); @@ -63,6 +77,7 @@ struct i2c_algo_pca_data { * For PCA9665, use the frequency you want here. */ unsigned int i2c_clock; unsigned int chip; + struct pca_i2c_bus_settings bus_settings; }; int i2c_pca_add_bus(struct i2c_adapter *); diff --git a/include/linux/i2c/i2c-msm-v2.h b/include/linux/i2c/i2c-msm-v2.h index 26cd52644f8d..cf695beaa4a3 100644 --- a/include/linux/i2c/i2c-msm-v2.h +++ b/include/linux/i2c/i2c-msm-v2.h @@ -37,6 +37,7 @@ enum msm_i2_debug_level { #define MASK_IS_SET_BOOL(val, mask) (MASK_IS_SET(val, mask) ? 1 : 0) #define KHz(freq) (1000 * freq) #define I2C_MSM_CLK_FAST_PLUS_FREQ (1000000) +#define I2C_MSM_MAX_RETRIES 5 /* QUP Registers */ enum { diff --git a/include/linux/ide.h b/include/linux/ide.h index a633898f36ac..eb2ac48c99db 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -128,7 +128,6 @@ struct ide_io_ports { */ #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ -#define SECTOR_SIZE 512 /* * Timeouts for various operations: diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index c2f46cffaab4..450ea5e38564 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -607,6 +607,15 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) } /** + * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee80211_is_any_nullfunc(__le16 fc) +{ + return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); +} + +/** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @fc: frame control field in little-endian byteorder */ @@ -629,6 +638,16 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } +/** + * ieee80211_is_frag - check if a frame is a fragment + * @hdr: 802.11 header of the frame + */ +static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) +{ + return ieee80211_has_morefrags(hdr->frame_control) || + hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index d5569734f672..676cf8d0acca 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -28,6 +28,14 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_mac_header(skb); } +/* Prefer this version in TX path, instead of + * skb_reset_mac_header() + eth_hdr() + */ +static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb->data; +} + int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a4ccc3122f93..cfcbc49f4ddf 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -70,13 +70,14 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, if (likely(success)) { struct vlan_pcpu_stats *pcpu_stats; - pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); pcpu_stats->rx_packets++; pcpu_stats->rx_bytes += len; if (multicast) pcpu_stats->rx_multicast++; u64_stats_update_end(&pcpu_stats->syncp); + put_cpu_ptr(vlan->pcpu_stats); } else { this_cpu_inc(vlan->pcpu_stats->rx_errors); } diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index dd676ba758ee..6c4f2bd8aaa7 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -30,6 +30,8 @@ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ +#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ + /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID @@ -73,7 +75,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(const struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } @@ -478,10 +480,10 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { - unsigned int vlan_depth = skb->mac_len; + unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at @@ -496,13 +498,12 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vlan_depth = ETH_HLEN; } do { - struct vlan_hdr *vh; + struct vlan_hdr vhdr, *vh; - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + if (unlikely(!vh || !--parse_depth)) return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (type == htons(ETH_P_8021Q) || @@ -522,11 +523,25 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(struct sk_buff *skb) +static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; + + return vlan_get_protocol(skb); +} + static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { diff --git a/include/linux/init.h b/include/linux/init.h index 3561ea30ed8c..9ae494724b1b 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -163,24 +163,8 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ -#ifdef CONFIG_LTO -/* Work around a LTO gcc problem: when there is no reference to a variable - * in a module it will be moved to the end of the program. This causes - * reordering of initcalls which the kernel does not like. - * Add a dummy reference function to avoid this. The function is - * deleted by the linker. - */ -#define LTO_REFERENCE_INITCALL(x) \ - ; /* yes this is needed */ \ - static __used __exit void *reference_##x(void) \ - { \ - return &x; \ - } -#else -#define LTO_REFERENCE_INITCALL(x) -#endif - -/* initcalls are now grouped by functionality into separate +/* + * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined * by link order. * For backwards compatibility, initcall() puts the call in @@ -188,12 +172,16 @@ extern bool initcall_debug; * * The `id' arg to __define_initcall() is needed so that multiple initcalls * can point at the same handler without causing duplicate-symbol build errors. + * + * Initcalls are run by placing pointers in initcall sections that the + * kernel iterates at runtime. The linker can do dead code / data elimination + * and remove that completely, so the initcall sections have to be marked + * as KEEP() in the linker script. */ #define __define_initcall(fn, id) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; \ - LTO_REFERENCE_INITCALL(__initcall_##fn##id) + __attribute__((__section__(".initcall" #id ".init"))) = fn; /* * Early initcalls run before initializing SMP. @@ -229,15 +217,15 @@ extern bool initcall_debug; #define __initcall(fn) device_initcall(fn) -#define __exitcall(fn) \ +#define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ +#define console_initcall(fn) \ + static initcall_t __initcall_##fn \ __used __section(.con_initcall.init) = fn -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ +#define security_initcall(fn) \ + static initcall_t __initcall_##fn \ __used __section(.security_initcall.init) = fn struct obs_kernel_param { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 021b1e9ff6cd..8aed56931361 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -208,6 +208,7 @@ extern struct task_group root_task_group; .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .nr_cpus_allowed= NR_CPUS, \ + .cpus_requested = CPU_MASK_ALL, \ .mm = NULL, \ .active_mm = &init_mm, \ .restart_block = { \ diff --git a/include/linux/input.h b/include/linux/input.h index 1e967694e9a5..b365d7c31e58 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file); int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file); +int input_ff_flush(struct input_dev *dev, struct file *file); int input_ff_create_memless(struct input_dev *dev, void *data, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 27dbab59f034..188bd1768971 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -317,8 +317,8 @@ enum { #define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) #define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) -#define QI_DEV_EIOTLB_GLOB(g) ((u64)g) -#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) +#define QI_DEV_EIOTLB_GLOB(g) ((u64)(g) & 0x1) +#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ @@ -447,6 +447,8 @@ struct intel_iommu { struct device *iommu_dev; /* IOMMU-sysfs device */ int node; u32 flags; /* Software defined flags */ + + struct dmar_drhd_unit *drhd; }; static inline void __iommu_flush_cache( diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 1eee6bcfcf76..cf1a1c126e89 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -123,6 +123,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (atomic_inc_not_zero(&ns->count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -139,6 +149,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7631503cd33f..454fa09de1c7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -53,11 +53,13 @@ struct ipv6_devconf { __s32 mc_forwarding; #endif __s32 disable_ipv6; + __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; __s32 ndisc_notify; __s32 suppress_frag_ndisc; __s32 accept_ra_mtu; + __s32 drop_unsolicited_na; struct ipv6_stable_secret { bool initialized; struct in6_addr secret; diff --git a/include/linux/irq.h b/include/linux/irq.h index f653225a896d..0e57f41bde84 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -136,6 +136,9 @@ struct irq_domain; * @node: node index useful for balancing * @handler_data: per-IRQ data for the irq_chip methods * @affinity: IRQ affinity on SMP + * @effective_affinity: The effective IRQ affinity on SMP as some irq + * chips do not allow multi CPU destinations. + * A subset of @affinity. * @msi_desc: MSI descriptor */ struct irq_common_data { @@ -146,6 +149,9 @@ struct irq_common_data { void *handler_data; struct msi_desc *msi_desc; cpumask_var_t affinity; +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + cpumask_var_t effective_affinity; +#endif }; /** @@ -473,15 +479,15 @@ static inline int irq_set_parent(int irq, int parent_irq) * Built-in IRQ handlers for various IRQ types, * callable via desc->handle_irq() */ -extern void handle_level_irq(struct irq_desc *desc); -extern void handle_fasteoi_irq(struct irq_desc *desc); -extern void handle_edge_irq(struct irq_desc *desc); -extern void handle_edge_eoi_irq(struct irq_desc *desc); -extern void handle_simple_irq(struct irq_desc *desc); -extern void handle_percpu_irq(struct irq_desc *desc); -extern void handle_percpu_devid_irq(struct irq_desc *desc); -extern void handle_bad_irq(struct irq_desc *desc); -extern void handle_nested_irq(unsigned int irq); +extern bool handle_level_irq(struct irq_desc *desc); +extern bool handle_fasteoi_irq(struct irq_desc *desc); +extern bool handle_edge_irq(struct irq_desc *desc); +extern bool handle_edge_eoi_irq(struct irq_desc *desc); +extern bool handle_simple_irq(struct irq_desc *desc); +extern bool handle_percpu_irq(struct irq_desc *desc); +extern bool handle_percpu_devid_irq(struct irq_desc *desc); +extern bool handle_bad_irq(struct irq_desc *desc); +extern bool handle_nested_irq(unsigned int irq); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -690,6 +696,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) return d->common->affinity; } +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->effective_affinity; +} +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ + cpumask_copy(d->common->effective_affinity, m); +} +#else +static inline void irq_data_update_effective_affinity(struct irq_data *d, + const struct cpumask *m) +{ +} +static inline +struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d) +{ + return d->common->affinity; +} +#endif + unsigned int arch_dynirq_lower_bound(unsigned int from); int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index a587a33363c7..50b55bdfe0bd 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -135,9 +135,9 @@ static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) * Architectures call this to let the generic IRQ layer * handle an interrupt. */ -static inline void generic_handle_irq_desc(struct irq_desc *desc) +static inline bool generic_handle_irq_desc(struct irq_desc *desc) { - desc->handle_irq(desc); + return desc->handle_irq(desc); } int generic_handle_irq(unsigned int irq); diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index 661bed0ed1f3..b31ab4b59c16 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ struct irq_desc; struct irq_data; -typedef void (*irq_flow_handler_t)(struct irq_desc *desc); +typedef bool (*irq_flow_handler_t)(struct irq_desc *desc); typedef void (*irq_preflow_handler_t)(struct irq_data *data); #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 65407f6c9120..00108208759d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1440,7 +1440,7 @@ static inline int jbd2_space_needed(journal_t *journal) static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ - unsigned long free = journal->j_free - 32; + long free = journal->j_free - 32; if (journal->j_committing_transaction) { unsigned long committing = atomic_read(&journal-> @@ -1449,7 +1449,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) /* Transaction + control blocks */ free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); } - return free; + return max_t(long, free, 0); } /* diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 05b63a1e9f84..3098b65177a3 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -605,6 +605,9 @@ do { \ * let gcc optimize the rest. */ +#ifdef CONFIG_DISABLE_TRACE_PRINTK +#define trace_printk pr_debug +#else #define trace_printk(fmt, ...) \ do { \ char _______STR[] = __stringify((__VA_ARGS__)); \ @@ -627,6 +630,7 @@ do { \ else \ __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) +#endif extern __printf(2, 3) int __trace_bprintk(unsigned long ip, const char *fmt, ...); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5d4e9c4b821d..4389b3157168 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -24,6 +24,7 @@ struct seq_file; struct vm_area_struct; struct super_block; struct file_system_type; +struct poll_table_struct; struct kernfs_open_node; struct kernfs_iattrs; @@ -46,6 +47,7 @@ enum kernfs_node_flag { KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, + KERNFS_HAS_RELEASE = 0x2000, }; /* @flags for kernfs_create_root() */ @@ -152,6 +154,8 @@ struct kernfs_syscall_ops { int (*rmdir)(struct kernfs_node *kn); int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name); + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn, + struct kernfs_root *root); }; struct kernfs_root { @@ -173,6 +177,7 @@ struct kernfs_open_file { /* published fields */ struct kernfs_node *kn; struct file *file; + struct seq_file *seq_file; void *priv; /* private fields, do not use outside kernfs proper */ @@ -183,11 +188,19 @@ struct kernfs_open_file { size_t atomic_write_len; bool mmapped; + bool released:1; const struct vm_operations_struct *vm_ops; }; struct kernfs_ops { /* + * Optional open/release methods. Both are called with + * @of->seq_file populated. + */ + int (*open)(struct kernfs_open_file *of); + void (*release)(struct kernfs_open_file *of); + + /* * Read is handled by either seq_file or raw_read(). * * If seq_show() is present, seq_file path is active. Other seq @@ -225,6 +238,9 @@ struct kernfs_ops { ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes, loff_t off); + unsigned int (*poll)(struct kernfs_open_file *of, + struct poll_table_struct *pt); + int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma); #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -267,13 +283,15 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); size_t kernfs_path_len(struct kernfs_node *kn); -char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, - size_t buflen); +int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, + char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn); struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns); +struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, + const char *path, const void *ns); void kernfs_get(struct kernfs_node *kn); void kernfs_put(struct kernfs_node *kn); @@ -281,6 +299,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry); struct kernfs_root *kernfs_root_from_sb(struct super_block *sb); struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn); +struct dentry *kernfs_node_dentry(struct kernfs_node *kn, + struct super_block *sb); struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); @@ -309,6 +329,8 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns); int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr); +unsigned int kernfs_generic_poll(struct kernfs_open_file *of, + struct poll_table_struct *pt); void kernfs_notify(struct kernfs_node *kn); const void *kernfs_super_ns(struct super_block *sb); @@ -336,9 +358,10 @@ static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) static inline size_t kernfs_path_len(struct kernfs_node *kn) { return 0; } -static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, - size_t buflen) -{ return NULL; } +static inline int kernfs_path_from_node(struct kernfs_node *root_kn, + struct kernfs_node *kn, + char *buf, size_t buflen) +{ return -ENOSYS; } static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { } static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { } @@ -350,6 +373,10 @@ static inline struct kernfs_node * kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name, const void *ns) { return NULL; } +static inline struct kernfs_node * +kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path, + const void *ns) +{ return NULL; } static inline void kernfs_get(struct kernfs_node *kn) { } static inline void kernfs_put(struct kernfs_node *kn) { } @@ -424,6 +451,22 @@ static inline void kernfs_init(void) { } #endif /* CONFIG_KERNFS */ +/** + * kernfs_path - build full path of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Builds and returns the full path of @kn in @buf of @buflen bytes. The + * path is built from the end of @buf so the returned pointer usually + * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated + * and %NULL is returned. + */ +static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) +{ + return kernfs_path_from_node(kn, NULL, buf, buflen); +} + static inline struct kernfs_node * kernfs_find_and_get(struct kernfs_node *kn, const char *name) { @@ -431,6 +474,12 @@ kernfs_find_and_get(struct kernfs_node *kn, const char *name) } static inline struct kernfs_node * +kernfs_walk_and_get(struct kernfs_node *kn, const char *path) +{ + return kernfs_walk_and_get_ns(kn, path, NULL); +} + +static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index e465bb15912d..6be5545d3584 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -317,7 +317,7 @@ extern void gdbstub_exit(int status); extern int kgdb_single_step; extern atomic_t kgdb_active; #define in_dbg_master() \ - (raw_smp_processor_id() == atomic_read(&kgdb_active)) + (irqs_disabled() && (smp_processor_id() == atomic_read(&kgdb_active))) extern bool dbg_is_early; extern void __init dbg_late_init(void); #else /* ! CONFIG_KGDB */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 41ba63c9a36e..fe8f6408ec5b 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -108,6 +108,8 @@ extern int __must_check kobject_rename(struct kobject *, const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern struct kobject *kobject_get(struct kobject *kobj); +extern struct kobject * __must_check kobject_get_unless_zero( + struct kobject *kobj); extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index cb527c78de9f..006ef813959b 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -192,6 +192,8 @@ struct kretprobe { raw_spinlock_t lock; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { struct hlist_node hlist; struct kretprobe *rp; @@ -366,6 +368,10 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +extern struct kprobe kprobe_busy; +void kprobe_busy_begin(void); +void kprobe_busy_end(void); + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); diff --git a/include/linux/kref.h b/include/linux/kref.h index 484604d184be..171f68a43d1d 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -25,6 +25,8 @@ struct kref { atomic_t refcount; }; +#define KREF_INIT(n) { .refcount = ATOMIC_INIT(n), } + /** * kref_init - initialize object. * @kref: object in question. @@ -34,6 +36,11 @@ static inline void kref_init(struct kref *kref) atomic_set(&kref->refcount, 1); } +static inline int kref_read(const struct kref *kref) +{ + return atomic_read(&kref->refcount); +} + /** * kref_get - increment refcount for object. * @kref: object. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d7ce4e3280db..1e62865821d9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -25,6 +25,7 @@ #include <linux/irqflags.h> #include <linux/context_tracking.h> #include <linux/irqbypass.h> +#include <linux/nospec.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -934,7 +935,7 @@ search_memslots(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } - if (gfn >= memslots[start].base_gfn && + if (start < slots->used_slots && gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); return &memslots[start]; @@ -952,7 +953,15 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) static inline unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; + /* + * The index was checked originally in search_memslots. To avoid + * that a malicious guest builds a Spectre gadget out of e.g. page + * table walks, do not let the processor speculate loads outside + * the guest's registered memslots. + */ + unsigned long offset = gfn - slot->base_gfn; + offset = array_index_nospec(offset, slot->npages); + return slot->userspace_addr + offset * PAGE_SIZE; } static inline int memslot_id(struct kvm *kvm, gfn_t gfn) diff --git a/include/linux/libata.h b/include/linux/libata.h index 6428ac4746de..b8c06ef83331 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -38,6 +38,7 @@ #include <linux/acpi.h> #include <linux/cdrom.h> #include <linux/sched.h> +#include <linux/async.h> /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -405,7 +406,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -436,6 +437,7 @@ enum { ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ @@ -499,6 +501,7 @@ enum hsm_task_states { }; enum ata_completion_errors { + AC_ERR_OK = 0, /* no error */ AC_ERR_DEV = (1 << 0), /* device reported error */ AC_ERR_HSM = (1 << 1), /* host state machine violation */ AC_ERR_TIMEOUT = (1 << 2), /* timeout */ @@ -872,6 +875,8 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + async_cookie_t cookie; + int em_message_type; void *private_data; @@ -893,9 +898,9 @@ struct ata_port_operations { /* * Command execution */ - int (*qc_defer)(struct ata_queued_cmd *qc); - int (*check_atapi_dma)(struct ata_queued_cmd *qc); - void (*qc_prep)(struct ata_queued_cmd *qc); + int (*qc_defer)(struct ata_queued_cmd *qc); + int (*check_atapi_dma)(struct ata_queued_cmd *qc); + enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); @@ -1187,7 +1192,7 @@ extern int ata_xfer_mode2shift(unsigned long xfer_mode); extern const char *ata_mode_string(unsigned long xfer_mask); extern unsigned long ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); -extern void ata_noop_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); @@ -1878,9 +1883,9 @@ extern const struct ata_port_operations ata_bmdma_port_ops; .sg_tablesize = LIBATA_MAX_PRD, \ .dma_boundary = ATA_DMA_BOUNDARY -extern void ata_bmdma_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc); -extern void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc); +extern enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc); extern unsigned int ata_bmdma_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_bmdma_interrupt(int irq, void *dev_instance); diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 8850e243c940..bd0a55821177 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -6,6 +6,9 @@ #include <asm/byteorder.h> +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 444d2b1313bd..302999e75470 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -29,6 +29,11 @@ struct hlist_nulls_node { ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_nulls_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ + }) /** * ptr_is_a_nulls - Test if a ptr is a nulls * @ptr: ptr to be tested @@ -66,10 +71,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -79,13 +84,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/log2.h b/include/linux/log2.h index c373295f359f..cca606609e1b 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -159,7 +159,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define roundup_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 1 : \ + ((n) == 1) ? 1 : \ (1UL << (ilog2((n) - 1) + 1)) \ ) : \ __roundup_pow_of_two(n) \ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index cf4832db2b29..a46f138a8a8d 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -939,7 +939,7 @@ * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. + * @flic contains the flowi_common struct to check for a match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. @@ -1121,22 +1121,22 @@ * * @binder_set_context_mgr * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_binder + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. + * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. - * @binder_transfer_file + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. + * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1261,6 +1261,10 @@ * audit_rule_init. * @rule contains the allocated rule * + * @inode_invalidate_secctx: + * Notify the security module that it must revalidate the security context + * of an inode. + * * @inode_notifysecctx: * Notify the security module of what the security context of an inode * should be. Initializes the incore security context managed by the @@ -1297,17 +1301,50 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. - * This is the main security structure. + * + * Security hooks for using the eBPF maps and programs functionalities through + * eBPF syscalls. + * + * @bpf: + * Do a initial check for all bpf syscalls after the attribute is copied + * into the kernel. The actual security module can implement their own + * rules to check the specific cmd they need. + * + * @bpf_map: + * Do a check when the kernel generate and return a file descriptor for + * eBPF maps. + * + * @map: bpf map that we want to access + * @mask: the access flags + * + * @bpf_prog: + * Do a check when the kernel generate and return a file descriptor for + * eBPF programs. + * + * @prog: bpf prog that userspace want to use. + * + * @bpf_map_alloc_security: + * Initialize the security field inside bpf map. + * + * @bpf_map_free_security: + * Clean up the security information stored inside bpf map. + * + * @bpf_prog_alloc_security: + * Initialize the security field inside bpf program. + * + * @bpf_prog_free_security: + * Clean up the security information stored inside bpf prog. + * */ union security_list_options { - int (*binder_set_context_mgr)(struct task_struct *mgr); - int (*binder_transaction)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_binder)(struct task_struct *from, - struct task_struct *to); - int (*binder_transfer_file)(struct task_struct *from, - struct task_struct *to, + int (*binder_set_context_mgr)(const struct cred *mgr); + int (*binder_transaction)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_binder)(const struct cred *from, + const struct cred *to); + int (*binder_transfer_file)(const struct cred *from, + const struct cred *to, struct file *file); int (*ptrace_access_check)(struct task_struct *child, @@ -1415,14 +1452,14 @@ union security_list_options { int (*inode_removexattr)(struct dentry *dentry, const char *name); int (*inode_need_killpriv)(struct dentry *dentry); int (*inode_killpriv)(struct dentry *dentry); - int (*inode_getsecurity)(const struct inode *inode, const char *name, + int (*inode_getsecurity)(struct inode *inode, const char *name, void **buffer, bool alloc); int (*inode_setsecurity)(struct inode *inode, const char *name, const void *value, size_t size, int flags); int (*inode_listsecurity)(struct inode *inode, char *buffer, size_t buffer_size); - void (*inode_getsecid)(const struct inode *inode, u32 *secid); + void (*inode_getsecid)(struct inode *inode, u32 *secid); int (*file_permission)(struct file *file, int mask); int (*file_alloc_security)(struct file *file); @@ -1519,6 +1556,7 @@ union security_list_options { int (*secctx_to_secid)(const char *secdata, u32 seclen, u32 *secid); void (*release_secctx)(char *secdata, u32 seclen); + void (*inode_invalidate_secctx)(struct inode *inode); int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen); int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen); int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen); @@ -1566,7 +1604,7 @@ union security_list_options { void (*secmark_refcount_inc)(void); void (*secmark_refcount_dec)(void); void (*req_classify_flow)(const struct request_sock *req, - struct flowi *fl); + struct flowi_common *flic); int (*tun_dev_alloc_security)(void **security); void (*tun_dev_free_security)(void *security); int (*tun_dev_create)(void); @@ -1594,7 +1632,7 @@ union security_list_options { u8 dir); int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const flowi_common *flic); int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1616,6 +1654,17 @@ union security_list_options { struct audit_context *actx); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL + int (*bpf)(int cmd, union bpf_attr *attr, + unsigned int size); + int (*bpf_map)(struct bpf_map *map, fmode_t fmode); + int (*bpf_prog)(struct bpf_prog *prog); + int (*bpf_map_alloc_security)(struct bpf_map *map); + void (*bpf_map_free_security)(struct bpf_map *map); + int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); + void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); +#endif /* CONFIG_BPF_SYSCALL */ }; struct security_hook_heads { @@ -1762,6 +1811,7 @@ struct security_hook_heads { struct list_head secid_to_secctx; struct list_head secctx_to_secid; struct list_head release_secctx; + struct list_head inode_invalidate_secctx; struct list_head inode_notifysecctx; struct list_head inode_setsecctx; struct list_head inode_getsecctx; @@ -1829,6 +1879,15 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_BPF_SYSCALL + struct list_head bpf; + struct list_head bpf_map; + struct list_head bpf_prog; + struct list_head bpf_map_alloc_security; + struct list_head bpf_map_free_security; + struct list_head bpf_prog_alloc_security; + struct list_head bpf_prog_free_security; +#endif /* CONFIG_BPF_SYSCALL */ }; /* diff --git a/include/linux/mfd/abx500/ux500_chargalg.h b/include/linux/mfd/abx500/ux500_chargalg.h index 67703f23e7ba..821a3b9bc16e 100644 --- a/include/linux/mfd/abx500/ux500_chargalg.h +++ b/include/linux/mfd/abx500/ux500_chargalg.h @@ -15,7 +15,7 @@ * - POWER_SUPPLY_TYPE_USB, * because only them store as drv_data pointer to struct ux500_charger. */ -#define psy_to_ux500_charger(x) power_supply_get_drvdata(psy) +#define psy_to_ux500_charger(x) power_supply_get_drvdata(x) /* Forward declaration */ struct ux500_charger; diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 3ab3cede28ea..93c14e9df630 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -50,9 +50,11 @@ enum { EC_MSG_TX_TRAILER_BYTES, EC_MSG_RX_PROTO_BYTES = 3, - /* Max length of messages */ - EC_MSG_BYTES = EC_PROTO2_MAX_PARAM_SIZE + + /* Max length of messages for proto 2*/ + EC_PROTO2_MSG_BYTES = EC_PROTO2_MAX_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES, + + EC_MAX_MSG_BYTES = 64 * 1024, }; /* diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h index 1b63fc2f42d1..52d53d134f72 100644 --- a/include/linux/mfd/rt5033-private.h +++ b/include/linux/mfd/rt5033-private.h @@ -203,13 +203,13 @@ enum rt5033_reg { #define RT5033_REGULATOR_BUCK_VOLTAGE_MIN 1000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_BUCK_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_BUCK_VOLTAGE_STEP_NUM 21 /* RT5033 regulator LDO output voltage uV */ #define RT5033_REGULATOR_LDO_VOLTAGE_MIN 1200000U #define RT5033_REGULATOR_LDO_VOLTAGE_MAX 3000000U #define RT5033_REGULATOR_LDO_VOLTAGE_STEP 100000U -#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 32 +#define RT5033_REGULATOR_LDO_VOLTAGE_STEP_NUM 19 /* RT5033 regulator SAFE LDO output voltage uV */ #define RT5033_REGULATOR_SAFE_LDO_VOLTAGE 4900000U diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 412aa988c6ad..626139de5917 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,7 @@ enum { /* one minute for the sake of bringup. Generally, commands must always * complete and we may need to increase this timeout value */ - MLX5_CMD_TIMEOUT_MSEC = 7200 * 1000, + MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, MLX5_CMD_WQ_MAX_NAME = 32, }; @@ -566,8 +566,10 @@ struct mlx5_cmd_work_ent { void *uout; int uout_size; mlx5_cmd_cbk_t callback; + struct delayed_work cb_timeout_work; void *context; int idx; + struct completion handling; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index a8786d27ab81..489fc317746a 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -539,6 +539,7 @@ struct mlx5_modify_qp_mbox_in { __be32 optparam; u8 rsvd1[4]; struct mlx5_qp_context ctx; + u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdabf3597ae..af0595bef9c8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -462,7 +462,6 @@ static inline void page_mapcount_reset(struct page *page) static inline int page_mapcount(struct page *page) { - VM_BUG_ON_PAGE(PageSlab(page), page); return atomic_read(&page->_mapcount) + 1; } @@ -504,6 +503,15 @@ static inline void get_huge_page_tail(struct page *page) extern bool __get_page_tail(struct page *page); +static inline int page_ref_count(struct page *page) +{ + return atomic_read(&page->_count); +} + +/* 127: arbitrary random number, small enough to assemble well */ +#define page_ref_zero_or_close_to_overflow(page) \ + ((unsigned int) atomic_read(&page->_count) + 127u <= 127u) + static inline void get_page(struct page *page) { if (unlikely(PageTail(page))) @@ -513,10 +521,22 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); + VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page); atomic_inc(&page->_count); } +static inline __must_check bool try_get_page(struct page *page) +{ + if (unlikely(PageTail(page))) + if (likely(__get_page_tail(page))) + return true; + + if (WARN_ON_ONCE(atomic_read(&page->_count) <= 0)) + return false; + atomic_inc(&page->_count); + return true; +} + static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); @@ -1120,6 +1140,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * followed by taking the mmap_sem for writing before modifying the * vmas or anything the coredump pretends not to change from under it. * + * It also has to be called when mmgrab() is used in the context of + * the process, but then the mm_count refcount is transferred outside + * the context of the process to run down_write() on that pinned mm. + * * NOTE: find_extend_vma() called from GUP context is the only place * that can modify the "mm" (notably the vm_start/end) under mmap_sem * for reading and outside the context of the process, so it is also @@ -1819,6 +1843,7 @@ extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(unsigned int flags); +extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -2008,7 +2033,7 @@ int write_one_page(struct page *page, int wait); void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ -#define VM_MAX_READAHEAD 512 /* kbytes */ +#define VM_MAX_READAHEAD CONFIG_VM_MAX_READAHEAD /* kbytes */ #define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 29c17fae9bbf..1019e8d3c88f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -442,6 +442,8 @@ struct mm_struct { unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ unsigned long def_flags; + + spinlock_t arg_lock; /* protect the below fields */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 66595321dfe3..6ed48944262c 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -522,9 +522,9 @@ struct platform_device_id { #define MDIO_MODULE_PREFIX "mdio:" -#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" +#define MDIO_ID_FMT "%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u" #define MDIO_ID_ARGS(_id) \ - (_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ + ((_id)>>31) & 1, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ ((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \ ((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \ ((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \ @@ -592,6 +592,10 @@ struct mips_cdmm_device_id { /* * MODULE_DEVICE_TABLE expects this struct to be called x86cpu_device_id. * Although gcc seems to ignore this error, clang fails without this define. + * + * Note: The ordering of the struct is different from upstream because the + * static initializers in kernels < 5.7 still use C89 style while upstream + * has been converted to proper C99 initializers. */ #define x86cpu_device_id x86_cpu_id struct x86_cpu_id { @@ -600,6 +604,7 @@ struct x86_cpu_id { __u16 model; __u16 feature; /* bit index */ kernel_ulong_t driver_data; + __u16 steppings; }; #define X86_FEATURE_MATCH(x) \ @@ -608,6 +613,7 @@ struct x86_cpu_id { #define X86_VENDOR_ANY 0xffff #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 +#define X86_STEPPING_ANY 0 #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ /* diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 52666d90ca94..060db5c327be 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -225,19 +225,11 @@ struct kparam_array VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ -#define module_param_call(name, set, get, arg, perm) \ - static const struct kernel_param_ops __param_ops_##name = \ - { .flags = 0, (void *)set, (void *)get }; \ +#define module_param_call(name, _set, _get, arg, perm) \ + static const struct kernel_param_ops __param_ops_##name = \ + { .flags = 0, .set = _set, .get = _get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ - name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) - -/* We don't get oldget: it's often a new-style param_get_uint, etc. */ -static inline int -__check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) -{ - return 0; -} + name, &__param_ops_##name, arg, perm, -1, 0) #ifdef CONFIG_SYSFS extern void kernel_param_lock(struct module *mod); diff --git a/include/linux/mount.h b/include/linux/mount.h index b606d8f57adf..bbe48613cc80 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -90,6 +90,9 @@ struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); +extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, + struct file_system_type *type, + const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); diff --git a/include/linux/msi.h b/include/linux/msi.h index d0d50cf00b4d..037f47fe76e6 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -128,7 +128,7 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag); -u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); +void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 676d3d2a1a0a..d8bae7cb86f3 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -307,7 +307,7 @@ void map_destroy(struct mtd_info *mtd); ({ \ int i, ret = 1; \ for (i = 0; i < map_words(map); i++) { \ - if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) { \ ret = 0; \ break; \ } \ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 5a9d1d4c2487..1a066faf7b80 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -38,7 +38,7 @@ extern int nand_scan_ident(struct mtd_info *mtd, int max_chips, struct nand_flash_dev *table); extern int nand_scan_tail(struct mtd_info *mtd); -/* Free resources held by the NAND device */ +/* Unregister the MTD device and free resources held by the NAND device */ extern void nand_release(struct mtd_info *mtd); /* Internal helper for board drivers which need to override command function */ @@ -276,7 +276,7 @@ struct nand_onfi_params { __le16 t_r; __le16 t_ccs; __le16 src_sync_timing_mode; - __le16 src_ssync_features; + u8 src_ssync_features; __le16 clk_pin_capacitance_typ; __le16 io_pin_capacitance_typ; __le16 input_pin_capacitance_typ; @@ -284,7 +284,7 @@ struct nand_onfi_params { u8 driver_strength_support; __le16 t_int_r; __le16 t_ald; - u8 reserved4[7]; + u8 reserved4[8]; /* vendor */ __le16 vendor_revision; @@ -1029,4 +1029,8 @@ int nand_check_erased_ecc_chunk(void *data, int datalen, void *ecc, int ecclen, void *extraoob, int extraooblen, int threshold); + +/* Free resources held by the NAND device */ +void nand_cleanup(struct nand_chip *chip); + #endif /* __LINUX_MTD_NAND_H */ diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 42ff7ff09bf5..09404fb36b34 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -127,7 +127,7 @@ static inline void print_drs_error(unsigned dsr) if (!(dsr & DSR_AVAILABLE)) printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) + if ((prog_status & 0x03) == 0x03) printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " "half with 41h command\n"); else if (prog_status & 0x02) diff --git a/include/linux/net.h b/include/linux/net.h index c00b8d182226..6de18ead3dfe 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -291,6 +291,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); +/* Routine returns the IP overhead imposed by a (caller-protected) socket. */ +u32 kernel_sock_ip_overhead(struct sock *sk); + #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43c998c83207..18d0a5beac0c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -60,6 +60,7 @@ struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; struct mpls_dev; +struct bpf_prog; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -777,6 +778,34 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); +/* These structures hold the attributes of xdp state that are being passed + * to the netdevice through the xdp op. + */ +enum xdp_netdev_command { + /* Set or clear a bpf program used in the earliest stages of packet + * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee + * is responsible for calling bpf_prog_put on any old progs that are + * stored. In case of error, the callee need not release the new prog + * reference, but on success it takes ownership and must bpf_prog_put + * when it is no longer used. + */ + XDP_SETUP_PROG, + /* Check if a bpf program is set on the device. The callee should + * return true if a program is currently attached and running. + */ + XDP_QUERY_PROG, +}; + +struct netdev_xdp { + enum xdp_netdev_command command; + union { + /* XDP_SETUP_PROG */ + struct bpf_prog *prog; + /* XDP_QUERY_PROG */ + bool prog_attached; + }; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1059,6 +1088,15 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while * sampling packet. + * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + * This function is used to specify the headroom that the skb must + * consider when allocation skb during packet reception. Setting + * appropriate rx headroom value allows avoiding skb head copy on + * forward. Setting a negative value reset the rx headroom to the + * default value. + * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * This function is used to set or query state related to XDP on the + * netdevice. See definition of enum xdp_netdev_command for details. * */ struct net_device_ops { @@ -1236,6 +1274,10 @@ struct net_device_ops { bool proto_down); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); + void (*ndo_set_rx_headroom)(struct net_device *dev, + int needed_headroom); + int (*ndo_xdp)(struct net_device *dev, + struct netdev_xdp *xdp); }; /** @@ -1271,6 +1313,10 @@ struct net_device_ops { * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device + * @IFF_TEAM: device is a team device + * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured + * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external + * entity (i.e. the master device for bridged veth) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1297,6 +1343,9 @@ enum netdev_priv_flags { IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, IFF_L3MDEV_SLAVE = 1<<23, + IFF_TEAM = 1<<24, + IFF_RXFH_CONFIGURED = 1<<25, + IFF_PHONY_HEADROOM = 1<<26, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -1323,6 +1372,8 @@ enum netdev_priv_flags { #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE +#define IFF_TEAM IFF_TEAM +#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED /** * struct net_device - The DEVICE structure. @@ -1617,6 +1668,11 @@ struct net_device { unsigned char if_port; unsigned char dma; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ unsigned int mtu; unsigned short type; unsigned short hard_header_len; @@ -1885,6 +1941,26 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, void *accel_priv); +/* returns the headroom that the master device needs to take in account + * when forwarding to this dev + */ +static inline unsigned netdev_get_fwd_headroom(struct net_device *dev) +{ + return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom; +} + +static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr) +{ + if (dev->netdev_ops->ndo_set_rx_headroom) + dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr); +} + +/* set the device rx headroom to the dev's default */ +static inline void netdev_reset_rx_headroom(struct net_device *dev) +{ + netdev_set_rx_headroom(dev, -1); +} + /* * Net namespace inlines */ @@ -2008,7 +2084,10 @@ struct napi_gro_cb { /* Number of gro_receive callbacks this packet already went through */ u8 recursion_counter:4; - /* 3 bit hole */ + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* 2 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2047,6 +2126,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; @@ -2296,6 +2376,8 @@ void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); DECLARE_PER_CPU(int, xmit_recursion); +#define XMIT_RECURSION_LIMIT 10 + static inline int dev_recursion_level(void) { return this_cpu_read(xmit_recursion); @@ -3120,6 +3202,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); +int dev_change_xdp_fd(struct net_device *dev, int fd); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); @@ -3127,6 +3210,21 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb); +static __always_inline int ____dev_forward_skb(struct net_device *dev, + struct sk_buff *skb) +{ + if (skb_orphan_frags(skb, GFP_ATOMIC) || + unlikely(!is_skb_forwardable(dev, skb))) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + + skb_scrub_packet(skb, true); + skb->priority = 0; + return 0; +} + extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ @@ -3302,7 +3400,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) if (debug_value == 0) /* no output */ return 0; /* set low N bits */ - return (1 << debug_value) - 1; + return (1U << debug_value) - 1; } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) @@ -3424,6 +3522,7 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_disable(); cpu = smp_processor_id(); + spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); @@ -3431,6 +3530,7 @@ static inline void netif_tx_disable(struct net_device *dev) netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } + spin_unlock(&dev->tx_global_lock); local_bh_enable(); } @@ -3621,7 +3721,6 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; -extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, @@ -3690,7 +3789,7 @@ extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(struct net_device *dev)); + bool (*type_check)(const struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); @@ -3887,32 +3986,32 @@ static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, skb->mac_len = mac_len; } -static inline bool netif_is_macvlan(struct net_device *dev) +static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; } -static inline bool netif_is_macvlan_port(struct net_device *dev) +static inline bool netif_is_macvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN_PORT; } -static inline bool netif_is_ipvlan(struct net_device *dev) +static inline bool netif_is_ipvlan(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_SLAVE; } -static inline bool netif_is_ipvlan_port(struct net_device *dev) +static inline bool netif_is_ipvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_IPVLAN_MASTER; } -static inline bool netif_is_bond_master(struct net_device *dev) +static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; } -static inline bool netif_is_bond_slave(struct net_device *dev) +static inline bool netif_is_bond_slave(const struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } @@ -3947,6 +4046,31 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_team_master(const struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM; +} + +static inline bool netif_is_team_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_TEAM_PORT; +} + +static inline bool netif_is_lag_master(const struct net_device *dev) +{ + return netif_is_bond_master(dev) || netif_is_team_master(dev); +} + +static inline bool netif_is_lag_port(const struct net_device *dev) +{ + return netif_is_bond_slave(dev) || netif_is_team_port(dev); +} + +static inline bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->priv_flags & IFF_RXFH_CONFIGURED; +} + /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0e1f433cc4b7..378c234ba3e4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -530,13 +530,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) sizeof(*addr)); } -/* Calculate the bytes required to store the inclusive range of a-b */ -static inline int -bitmap_bytes(u32 a, u32 b) -{ - return 4 * ((((b - a + 8) / 8) + 3) / 4); -} - #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_comment.h> diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index 2ab2830316b7..aca42a2e79cf 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -4,7 +4,7 @@ #include <linux/netfilter/nf_conntrack_common.h> -extern const char *const pptp_msg_name[]; +const char *pptp_msg_name(u_int16_t msg); /* state of the control session */ enum pptp_ctrlsess_state { diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6923e4049de3..417e3a53dab8 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -135,6 +135,7 @@ struct xt_match { const char *table; unsigned int matchsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -175,6 +176,7 @@ struct xt_target { const char *table; unsigned int targetsize; + unsigned int usersize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif @@ -254,6 +256,13 @@ int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); +int xt_match_to_user(const struct xt_entry_match *m, + struct xt_entry_match __user *u); +int xt_target_to_user(const struct xt_entry_target *t, + struct xt_entry_target __user *u); +int xt_data_to_user(void __user *dst, const void *src, + int usersize, int size); + void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); @@ -327,7 +336,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } diff --git a/include/linux/netfilter/xt_qtaguid.h b/include/linux/netfilter/xt_qtaguid.h deleted file mode 100644 index 1c671552ec37..000000000000 --- a/include/linux/netfilter/xt_qtaguid.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _XT_QTAGUID_MATCH_H -#define _XT_QTAGUID_MATCH_H - -/* For now we just replace the xt_owner. - * FIXME: make iptables aware of qtaguid. */ -#include <linux/netfilter/xt_owner.h> - -#define XT_QTAGUID_UID XT_OWNER_UID -#define XT_QTAGUID_GID XT_OWNER_GID -#define XT_QTAGUID_SOCKET XT_OWNER_SOCKET -#define xt_qtaguid_match_info xt_owner_match_info - -int qtaguid_untag(struct socket *sock, bool kernel); -#endif /* _XT_QTAGUID_MATCH_H */ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 35fa08fd7739..ac0d65bef5d0 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct cgroup_namespace; struct fs_struct; /* @@ -33,6 +34,7 @@ struct nsproxy { struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns_for_children; struct net *net_ns; + struct cgroup_namespace *cgroup_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/of.h b/include/linux/of.h index 2772f027f88f..31964ddc2442 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -941,6 +941,7 @@ static inline int of_get_available_child_count(const struct device_node *np) #define _OF_DECLARE(table, name, compat, fn, fn_type) \ static const struct of_device_id __of_table_##name \ __used __section(__##table##_of_table) \ + __aligned(__alignof__(struct of_device_id)) \ = { .compatible = compat, \ .data = (fn == (fn_type)NULL) ? fn : fn } #else diff --git a/include/linux/padata.h b/include/linux/padata.h index 438694650471..547a8d1e4a3b 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -24,7 +24,6 @@ #include <linux/workqueue.h> #include <linux/spinlock.h> #include <linux/list.h> -#include <linux/timer.h> #include <linux/notifier.h> #include <linux/kobject.h> @@ -37,6 +36,7 @@ * @list: List entry, to attach to the padata lists. * @pd: Pointer to the internal control structure. * @cb_cpu: Callback cpu for serializatioon. + * @cpu: Cpu for parallelization. * @seq_nr: Sequence number of the parallelized data object. * @info: Used to pass information from the parallel to the serial function. * @parallel: Parallel execution function. @@ -46,6 +46,7 @@ struct padata_priv { struct list_head list; struct parallel_data *pd; int cb_cpu; + int cpu; int info; void (*parallel)(struct padata_priv *padata); void (*serial)(struct padata_priv *padata); @@ -83,7 +84,6 @@ struct padata_serial_queue { * @serial: List to wait for serialization after reordering. * @pwork: work struct for parallelization. * @swork: work struct for serialization. - * @pd: Backpointer to the internal control structure. * @work: work struct for parallelization. * @num_obj: Number of objects that are processed by this cpu. * @cpu_index: Index of the cpu. @@ -91,7 +91,6 @@ struct padata_serial_queue { struct padata_parallel_queue { struct padata_list parallel; struct padata_list reorder; - struct parallel_data *pd; struct work_struct work; atomic_t num_obj; int cpu_index; @@ -118,10 +117,10 @@ struct padata_cpumask { * @reorder_objects: Number of objects waiting in the reorder queues. * @refcnt: Number of objects holding a reference on this parallel_data. * @max_seq_nr: Maximal used sequence number. + * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. + * @reorder_work: work struct for reordering. * @lock: Reorder lock. - * @processed: Number of already processed objects. - * @timer: Reorder timer. */ struct parallel_data { struct padata_instance *pinst; @@ -130,10 +129,10 @@ struct parallel_data { atomic_t reorder_objects; atomic_t refcnt; atomic_t seq_nr; + int cpu; struct padata_cpumask cpumask; + struct work_struct reorder_work; spinlock_t lock ____cacheline_aligned; - unsigned int processed; - struct timer_list timer; }; /** diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d2f4a732b3e8..ca7b7cbcb23a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -243,7 +243,7 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x) __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN); } -typedef int filler_t(void *, struct page *); +typedef int filler_t(struct file *, struct page *); pgoff_t page_cache_next_hole(struct address_space *mapping, pgoff_t index, unsigned long max_scan); @@ -392,7 +392,7 @@ extern int read_cache_pages(struct address_space *mapping, static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, void *data) { - filler_t *filler = (filler_t *)mapping->a_ops->readpage; + filler_t *filler = mapping->a_ops->readpage; return read_cache_page(mapping, index, filler, data); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 5f37614f2451..c871b19cc915 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1442,8 +1442,9 @@ static inline int pci_set_dma_seg_boundary(struct pci_dev *dev, { return -EIO; } static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY; } -static inline int __pci_register_driver(struct pci_driver *drv, - struct module *owner) +static inline int __must_check __pci_register_driver(struct pci_driver *drv, + struct module *owner, + const char *mod_name) { return 0; } static inline int pci_register_driver(struct pci_driver *drv) { return 0; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1af616138d1d..5547f1a0f83b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2496,6 +2496,8 @@ #define PCI_DEVICE_ID_KORENIX_JETCARDF2 0x1700 #define PCI_DEVICE_ID_KORENIX_JETCARDF3 0x17ff +#define PCI_VENDOR_ID_HUAWEI 0x19e5 + #define PCI_VENDOR_ID_NETRONOME 0x19ee #define PCI_DEVICE_ID_NETRONOME_NFP3200 0x3200 #define PCI_DEVICE_ID_NETRONOME_NFP3240 0x3240 diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 84a109449610..b6332cb761a4 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -76,9 +76,9 @@ static inline s64 percpu_counter_read(struct percpu_counter *fbc) */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { - s64 ret = fbc->count; + /* Prevent reloads of fbc->count */ + s64 ret = READ_ONCE(fbc->count); - barrier(); /* Prevent reloads of fbc->count */ if (ret >= 0) return ret; return 0; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index efce166a9f17..a08674a92aff 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -58,12 +58,30 @@ struct perf_guest_info_callbacks { struct perf_callchain_entry { __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; + __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */ }; +struct perf_callchain_entry_ctx { + struct perf_callchain_entry *entry; + u32 max_stack; +}; + +typedef unsigned long (*perf_copy_f)(void *dst, const void *src, + unsigned long off, unsigned long len); + +struct perf_raw_frag { + union { + struct perf_raw_frag *next; + unsigned long pad; + }; + perf_copy_f copy; + void *data; + u32 size; +} __packed; + struct perf_raw_record { + struct perf_raw_frag frag; u32 size; - void *data; }; /* @@ -586,6 +604,10 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; +#ifdef CONFIG_BPF_SYSCALL + perf_overflow_handler_t orig_overflow_handler; + struct bpf_prog *prog; +#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; @@ -693,6 +715,11 @@ struct perf_output_handle { int page; }; +struct bpf_perf_event_data_kern { + struct pt_regs *regs; + struct perf_sample_data *data; +}; + #ifdef CONFIG_CGROUP_PERF /* @@ -747,7 +774,7 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); -extern struct perf_event *perf_event_get(unsigned int fd); +extern struct file *perf_event_get(unsigned int fd); extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); @@ -853,6 +880,12 @@ extern void perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool +is_default_overflow_handler(struct perf_event *event) +{ + return (event->overflow_handler == perf_event_output); +} + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, @@ -984,13 +1017,25 @@ extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); -extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs); -extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs); +extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); +extern struct perf_callchain_entry * +get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, + u32 max_stack, bool crosstask, bool add_mark); +extern int get_callchain_buffers(int max_stack); +extern void put_callchain_buffers(void); -static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +extern int sysctl_perf_event_max_stack; + +static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip) { - if (entry->nr < PERF_MAX_STACK_DEPTH) + struct perf_callchain_entry *entry = ctx->entry; + if (entry->nr < ctx->max_stack) { entry->ip[entry->nr++] = ip; + return 0; + } else { + return -1; /* no more room, stop walking the stack */ + } } extern int sysctl_perf_event_paranoid; @@ -1007,6 +1052,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int perf_event_max_stack_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool perf_paranoid_any(void) { @@ -1029,7 +1076,7 @@ static inline bool perf_paranoid_kernel(void) } extern void perf_event_init(void); -extern void perf_tp_event(u64 addr, u64 count, void *record, +extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, struct task_struct *task); @@ -1094,7 +1141,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } -static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } +static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) { return ERR_PTR(-EINVAL); @@ -1147,6 +1194,11 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif +static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) +{ + return frag->pad < sizeof(u64); +} + #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) /* diff --git a/include/linux/pid.h b/include/linux/pid.h index 97b745ddece5..1d847d3de245 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -2,6 +2,7 @@ #define _LINUX_PID_H #include <linux/rcupdate.h> +#include <linux/wait.h> enum pid_type { @@ -62,6 +63,8 @@ struct pid unsigned int level; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; + /* wait queue for pidfd notifications */ + wait_queue_head_t wait_pidfd; struct rcu_head rcu; struct upid numbers[1]; }; @@ -74,6 +77,8 @@ struct pid_link struct pid *pid; }; +extern const struct file_operations pidfd_fops; + static inline struct pid *get_pid(struct pid *pid) { if (pid) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 24f5470d3944..0b28b65c12fb 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -112,9 +112,22 @@ struct pipe_buf_operations { /* * Get a reference to the pipe buffer. */ - void (*get)(struct pipe_inode_info *, struct pipe_buffer *); + bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; +/** + * pipe_buf_get - get a reference to a pipe_buffer + * @pipe: the pipe that the buffer belongs to + * @buf: the buffer to get a reference to + * + * Return: %true if the reference was successfully obtained. + */ +static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return buf->ops->get(pipe, buf); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE @@ -137,7 +150,7 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); +bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h index 2d08816720f6..5bb0a119f39a 100644 --- a/include/linux/platform_data/dma-imx-sdma.h +++ b/include/linux/platform_data/dma-imx-sdma.h @@ -50,7 +50,10 @@ struct sdma_script_start_addrs { /* End of v2 array */ s32 zcanfd_2_mcu_addr; s32 zqspi_2_mcu_addr; + s32 mcu_2_ecspi_addr; /* End of v3 array */ + s32 mcu_2_zqspi_addr; + /* End of v4 array */ }; /** diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 5df733b8f704..c03a368b5911 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -219,10 +219,8 @@ struct pnp_card { #define global_to_pnp_card(n) list_entry(n, struct pnp_card, global_list) #define protocol_to_pnp_card(n) list_entry(n, struct pnp_card, protocol_list) #define to_pnp_card(n) container_of(n, struct pnp_card, dev) -#define pnp_for_each_card(card) \ - for((card) = global_to_pnp_card(pnp_cards.next); \ - (card) != global_to_pnp_card(&pnp_cards); \ - (card) = global_to_pnp_card((card)->global_list.next)) +#define pnp_for_each_card(card) \ + list_for_each_entry(card, &pnp_cards, global_list) struct pnp_card_link { struct pnp_card *card; @@ -275,14 +273,9 @@ struct pnp_dev { #define card_to_pnp_dev(n) list_entry(n, struct pnp_dev, card_list) #define protocol_to_pnp_dev(n) list_entry(n, struct pnp_dev, protocol_list) #define to_pnp_dev(n) container_of(n, struct pnp_dev, dev) -#define pnp_for_each_dev(dev) \ - for((dev) = global_to_pnp_dev(pnp_global.next); \ - (dev) != global_to_pnp_dev(&pnp_global); \ - (dev) = global_to_pnp_dev((dev)->global_list.next)) -#define card_for_each_dev(card,dev) \ - for((dev) = card_to_pnp_dev((card)->devices.next); \ - (dev) != card_to_pnp_dev(&(card)->devices); \ - (dev) = card_to_pnp_dev((dev)->card_list.next)) +#define pnp_for_each_dev(dev) list_for_each_entry(dev, &pnp_global, global_list) +#define card_for_each_dev(card, dev) \ + list_for_each_entry(dev, &(card)->devices, card_list) #define pnp_dev_name(dev) (dev)->name static inline void *pnp_get_drvdata(struct pnp_dev *pdev) @@ -434,14 +427,10 @@ struct pnp_protocol { }; #define to_pnp_protocol(n) list_entry(n, struct pnp_protocol, protocol_list) -#define protocol_for_each_card(protocol,card) \ - for((card) = protocol_to_pnp_card((protocol)->cards.next); \ - (card) != protocol_to_pnp_card(&(protocol)->cards); \ - (card) = protocol_to_pnp_card((card)->protocol_list.next)) -#define protocol_for_each_dev(protocol,dev) \ - for((dev) = protocol_to_pnp_dev((protocol)->devices.next); \ - (dev) != protocol_to_pnp_dev(&(protocol)->devices); \ - (dev) = protocol_to_pnp_dev((dev)->protocol_list.next)) +#define protocol_for_each_card(protocol, card) \ + list_for_each_entry(card, &(protocol)->cards, protocol_list) +#define protocol_for_each_dev(protocol, dev) \ + list_for_each_entry(dev, &(protocol)->devices, protocol_list) extern struct bus_type pnp_bus_type; diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 83b22ae9ae12..b39420a0321c 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -104,29 +104,32 @@ struct posix_clock_operations { * * @ops: Functional interface to the clock * @cdev: Character device instance for this clock - * @kref: Reference count. + * @dev: Pointer to the clock's device. * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. - * @release: A function to free the structure when the reference count reaches - * zero. May be NULL if structure is statically allocated. * * Drivers should embed their struct posix_clock within a private * structure, obtaining a reference to it during callbacks using * container_of(). + * + * Drivers should supply an initialized but not exposed struct device + * to posix_clock_register(). It is used to manage lifetime of the + * driver's private structure. It's 'release' field should be set to + * a release function for this private structure. */ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; - struct kref kref; + struct device *dev; struct rw_semaphore rwsem; bool zombie; - void (*release)(struct posix_clock *clk); }; /** * posix_clock_register() - register a new clock - * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' - * @devid: Allocated device id + * @clk: Pointer to the clock. Caller must provide 'ops' field + * @dev: Pointer to the initialized device. Caller must provide + * 'release' field * * A clock driver calls this function to register itself with the * clock device subsystem. If 'clk' points to dynamically allocated @@ -135,7 +138,7 @@ struct posix_clock { * * Returns zero on success, non-zero otherwise. */ -int posix_clock_register(struct posix_clock *clk, dev_t devid); +int posix_clock_register(struct posix_clock *clk, struct device *dev); /** * posix_clock_unregister() - unregister a clock diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 522757ac9cd4..890f53881fad 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -75,7 +75,7 @@ enum max17042_register { MAX17042_RelaxCFG = 0x2A, MAX17042_MiscCFG = 0x2B, MAX17042_TGAIN = 0x2C, - MAx17042_TOFF = 0x2D, + MAX17042_TOFF = 0x2D, MAX17042_CGAIN = 0x2E, MAX17042_COFF = 0x2F, diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c3764d2a2934..4d2e20415071 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -115,6 +115,11 @@ enum { POWER_SUPPLY_PL_USBMID_USBMID, }; +enum { + POWER_SUPPLY_PL_STACKED_BATFET, + POWER_SUPPLY_PL_NON_STACKED_BATFET, +}; + enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, @@ -267,6 +272,10 @@ enum power_supply_property { POWER_SUPPLY_PROP_ENABLE_JEITA_DETECTION, POWER_SUPPLY_PROP_ALLOW_HVDCP3, POWER_SUPPLY_PROP_MAX_PULSE_ALLOWED, + POWER_SUPPLY_PROP_PARALLEL_BATFET_MODE, + POWER_SUPPLY_PROP_PARALLEL_FCC_MAX, + POWER_SUPPLY_PROP_MIN_ICL, + POWER_SUPPLY_PROP_FG_RESET_CLOCK, /* Local extensions of type int64_t */ POWER_SUPPLY_PROP_CHARGE_COUNTER_EXT, /* Properties of type `const char *' */ diff --git a/include/linux/prandom.h b/include/linux/prandom.h new file mode 100644 index 000000000000..e20339c78a84 --- /dev/null +++ b/include/linux/prandom.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/prandom.h + * + * Include file for the fast pseudo-random 32-bit + * generation. + */ +#ifndef _LINUX_PRANDOM_H +#define _LINUX_PRANDOM_H + +#include <linux/types.h> +#include <linux/percpu.h> + +u32 prandom_u32(void); +void prandom_bytes(void *buf, size_t nbytes); +void prandom_seed(u32 seed); +void prandom_reseed_late(void); + +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +u32 prandom_u32_state(struct rnd_state *state); +void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); +void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); + +#define prandom_init_once(pcpu_state) \ + DO_ONCE(prandom_seed_full_state, (pcpu_state)) + +/** + * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) + * @ep_ro: right open interval endpoint + * + * Returns a pseudo-random number that is in interval [0, ep_ro). Note + * that the result depends on PRNG being well distributed in [0, ~0U] + * u32 space. Here we use maximally equidistributed combined Tausworthe + * generator, that is, prandom_u32(). This is useful when requesting a + * random index of an array containing ep_ro elements, for example. + * + * Returns: pseudo-random number in interval [0, ep_ro) + */ +static inline u32 prandom_u32_max(u32 ep_ro) +{ + return (u32)(((u64) prandom_u32() * ep_ro) >> 32); +} + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/* Pseudo random number generator from numerical recipes. */ +static inline u32 next_pseudo_random32(u32 seed) +{ + return seed * 1664525 + 1013904223; +} + +#endif diff --git a/include/linux/printk.h b/include/linux/printk.h index 9729565c25ff..9ccbdf2c1453 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -106,13 +106,13 @@ struct va_format { /* * Dummy printk for disabled debugging statements to use whilst maintaining - * gcc's format and side-effect checking. + * gcc's format checking. */ -static inline __printf(1, 2) -int no_printk(const char *fmt, ...) -{ - return 0; -} +#define no_printk(fmt, ...) \ +do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b326d0a0cace..de3f442b7ab6 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -41,6 +41,7 @@ extern void *proc_get_parent_data(const struct inode *); extern void proc_remove(struct proc_dir_entry *); extern void remove_proc_entry(const char *, struct proc_dir_entry *); extern int remove_proc_subtree(const char *, struct proc_dir_entry *); +extern struct pid *tgid_pidfd_to_pid(const struct file *file); #else /* CONFIG_PROC_FS */ @@ -72,6 +73,11 @@ static inline void proc_remove(struct proc_dir_entry *de) {} #define remove_proc_entry(name, parent) do {} while (0) static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; } +static inline struct pid *tgid_pidfd_to_pid(const struct file *file) +{ + return ERR_PTR(-EBADF); +} + #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_PROC_UID diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 42dfc615dbf8..ca85a4348ffc 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -9,6 +9,8 @@ struct pid_namespace; struct nsproxy; struct path; +struct task_struct; +struct inode; struct proc_ns_operations { const char *name; @@ -16,6 +18,7 @@ struct proc_ns_operations { struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); + struct user_namespace *(*owner)(struct ns_common *ns); }; extern const struct proc_ns_operations netns_operations; @@ -24,6 +27,7 @@ extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; +extern const struct proc_ns_operations cgroupns_operations; /* * We always define these enumerators @@ -34,6 +38,7 @@ enum { PROC_UTS_INIT_INO = 0xEFFFFFFEU, PROC_USER_INIT_INO = 0xEFFFFFFDU, PROC_PID_INIT_INO = 0xEFFFFFFCU, + PROC_CGROUP_INIT_INO = 0xEFFFFFFBU, }; #ifdef CONFIG_PROC_FS diff --git a/include/linux/qpnp/pwm.h b/include/linux/qpnp/pwm.h index 020f18b2cc4b..782b7849da4a 100644 --- a/include/linux/qpnp/pwm.h +++ b/include/linux/qpnp/pwm.h @@ -155,6 +155,11 @@ int pwm_lut_config(struct pwm_device *pwm, int period_us, int pwm_config_us(struct pwm_device *pwm, int duty_us, int period_us); +/* + * synchronized enable of multiple pwm instances + */ +int pwm_enable_synchronized(struct pwm_device **pwms, size_t num); + #else static inline int pwm_config_period(struct pwm_device *pwm, struct pwm_period_config *pwm_p) @@ -183,6 +188,11 @@ static inline int pwm_config_us(struct pwm_device *pwm, { return -EINVAL; } + +static inline int pwm_enable_synchronized(struct pwm_device **pwms, size_t num) +{ + return -EINVAL; +} #endif /* Standard APIs supported */ diff --git a/include/linux/quota.h b/include/linux/quota.h index b2505acfd3c0..b34412df1542 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -253,7 +253,7 @@ enum { }; struct dqstats { - int stat[_DQST_DQSTAT_LAST]; + unsigned long stat[_DQST_DQSTAT_LAST]; struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 1f350238445c..a946c4c1a77e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -54,6 +54,16 @@ static inline struct dquot *dqgrab(struct dquot *dquot) atomic_inc(&dquot->dq_count); return dquot; } + +static inline bool dquot_is_busy(struct dquot *dquot) +{ + if (test_bit(DQ_MOD_B, &dquot->dq_flags)) + return true; + if (atomic_read(&dquot->dq_count) > 1) + return true; + return false; +} + void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), diff --git a/include/linux/random.h b/include/linux/random.h index 9c29122037f9..2fa4207fd067 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,61 +37,12 @@ unsigned int get_random_int(void); unsigned long get_random_long(void); unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len); -u32 prandom_u32(void); -void prandom_bytes(void *buf, size_t nbytes); -void prandom_seed(u32 seed); -void prandom_reseed_late(void); - -struct rnd_state { - __u32 s1, s2, s3, s4; -}; - -u32 prandom_u32_state(struct rnd_state *state); -void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); -void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); - -#define prandom_init_once(pcpu_state) \ - DO_ONCE(prandom_seed_full_state, (pcpu_state)) - -/** - * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) - * @ep_ro: right open interval endpoint - * - * Returns a pseudo-random number that is in interval [0, ep_ro). Note - * that the result depends on PRNG being well distributed in [0, ~0U] - * u32 space. Here we use maximally equidistributed combined Tausworthe - * generator, that is, prandom_u32(). This is useful when requesting a - * random index of an array containing ep_ro elements, for example. - * - * Returns: pseudo-random number in interval [0, ep_ro) - */ -static inline u32 prandom_u32_max(u32 ep_ro) -{ - return (u32)(((u64) prandom_u32() * ep_ro) >> 32); -} - /* - * Handle minimum values for seeds + * This is designed to be standalone for just prandom + * users, but for now we include it from <linux/random.h> + * for legacy reasons. */ -static inline u32 __seed(u32 x, u32 m) -{ - return (x < m) ? x + m : x; -} - -/** - * prandom_seed_state - set seed for prandom_u32_state(). - * @state: pointer to state structure to receive the seed. - * @seed: arbitrary 64-bit value to use as a seed. - */ -static inline void prandom_seed_state(struct rnd_state *state, u64 seed) -{ - u32 i = (seed >> 32) ^ (seed << 10) ^ seed; - - state->s1 = __seed(i, 2U); - state->s2 = __seed(i, 8U); - state->s3 = __seed(i, 16U); - state->s4 = __seed(i, 128U); -} +#include <linux/prandom.h> #ifdef CONFIG_ARCH_RANDOM # include <asm/archrandom.h> @@ -122,10 +73,4 @@ static inline int arch_has_random_seed(void) } #endif -/* Pseudo random number generator from numerical recipes. */ -static inline u32 next_pseudo_random32(u32 seed) -{ - return seed * 1664525 + 1013904223; -} - #endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 1c33dd7da4a7..8020dca6c61f 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -33,7 +33,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -65,7 +65,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -93,11 +93,12 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. @@ -117,5 +118,19 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) +/** + * hlist_nulls_for_each_entry_safe - + * iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + */ +#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ + for (({barrier();}), \ + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0a93e9d1708e..3072e9c93ae6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -880,9 +880,7 @@ static __always_inline void rcu_read_lock(void) * Unfortunately, this function acquires the scheduler's runqueue and * priority-inheritance spinlocks. This means that deadlock could result * if the caller of rcu_read_unlock() already holds one of these locks or - * any lock that is ever acquired while holding them; or any lock which - * can be taken from interrupt context because rcu_boost()->rt_mutex_lock() - * does not disable irqs while taking ->wait_lock. + * any lock that is ever acquired while holding them. * * That said, RCU readers are never priority boosted unless they were * preempted. Therefore, one way to avoid deadlock is to make sure diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index d8ecefaf63ca..6b8ec40af2c4 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -44,8 +44,6 @@ enum ab8505_regulator_id { AB8505_LDO_ANAMIC2, AB8505_LDO_AUX8, AB8505_LDO_ANA, - AB8505_SYSCLKREQ_2, - AB8505_SYSCLKREQ_4, AB8505_NUM_REGULATORS, }; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 734ae72d3c16..a4f06dcfc85c 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -496,7 +496,7 @@ static inline unsigned int regulator_get_mode(struct regulator *regulator) static inline int regulator_set_load(struct regulator *regulator, int load_uA) { - return REGULATOR_MODE_NORMAL; + return 0; } static inline int regulator_allow_bypass(struct regulator *regulator, diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index ce6b962ffed4..842f70fcfc48 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -6,7 +6,7 @@ struct reset_controller_dev; /** - * struct reset_control_ops + * struct reset_control_ops - reset controller driver callbacks * * @reset: for self-deasserting resets, does all necessary * things to reset the device diff --git a/include/linux/sched.h b/include/linux/sched.h index a05e1e82ecf3..50f7ad30d9c7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -170,6 +170,7 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); +extern bool cpu_has_rt_task(int cpu); extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); @@ -1668,6 +1669,15 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_WALT + struct ravg ravg; + /* + * 'init_load_pct' represents the initial task load assigned to children + * of this task + */ + u32 init_load_pct; + u64 last_sleep_ts; +#endif #ifdef CONFIG_SCHED_HMP struct ravg ravg; /* @@ -1700,6 +1710,7 @@ struct task_struct { unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; + cpumask_t cpus_requested; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; @@ -1878,8 +1889,8 @@ struct task_struct { struct seccomp seccomp; /* Thread group tracking */ - u32 parent_exec_id; - u32 self_exec_id; + u64 parent_exec_id; + u64 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, * mempolicy */ spinlock_t alloc_lock; @@ -1973,6 +1984,8 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct mutex futex_exit_mutex; + unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; @@ -2379,7 +2392,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, */ #define PF_WAKE_UP_IDLE 0x00000002 /* try to wake up on an idle CPU */ #define PF_EXITING 0x00000004 /* getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ @@ -3042,8 +3054,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exit() */ +extern void exit_mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exec() */ +extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, @@ -3191,7 +3205,7 @@ static inline int thread_group_empty(struct task_struct *p) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), @@ -3719,6 +3733,8 @@ static inline unsigned long rlimit_max(unsigned int limit) #define SCHED_CPUFREQ_RT (1U << 0) #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) +#define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3) +#define SCHED_CPUFREQ_WALT (1U << 4) #ifdef CONFIG_CPU_FREQ struct update_util_data { diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index e54065331f72..c85fe9872d07 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -41,6 +41,12 @@ extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_sync_hint_enable; extern unsigned int sysctl_sched_cstate_aware; +#ifdef CONFIG_SCHED_WALT +extern unsigned int sysctl_sched_use_walt_cpu_util; +extern unsigned int sysctl_sched_use_walt_task_util; +extern unsigned int sysctl_sched_walt_init_task_load_pct; +extern unsigned int sysctl_sched_walt_cpu_high_irqload; +#endif #ifdef CONFIG_SCHED_HMP @@ -58,7 +64,7 @@ extern unsigned int sysctl_sched_window_stats_policy; extern unsigned int sysctl_sched_ravg_hist_size; extern unsigned int sysctl_sched_cpu_high_irqload; extern unsigned int sysctl_sched_init_task_load_pct; -extern unsigned int sysctl_sched_spill_nr_run; +extern __read_mostly unsigned int sysctl_sched_spill_nr_run; extern unsigned int sysctl_sched_spill_load_pct; extern unsigned int sysctl_sched_upmigrate_pct; extern unsigned int sysctl_sched_downmigrate_pct; @@ -160,6 +166,7 @@ static inline unsigned int get_sysctl_sched_cfs_boost(void) extern unsigned int sysctl_sched_autogroup_enabled; #endif +extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; extern int sched_rr_handler(struct ctl_table *table, int write, diff --git a/include/linux/security.h b/include/linux/security.h index 7caf520e6233..2800ef53c424 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -29,6 +29,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/bio.h> +#include <linux/bpf.h> struct linux_binprm; struct cred; @@ -101,7 +102,7 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; -struct flowi; +struct flowi_common; struct dst_entry; struct xfrm_selector; struct xfrm_policy; @@ -183,13 +184,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) extern int security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -272,10 +273,10 @@ int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct dentry *dentry); -int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); +int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); -void security_inode_getsecid(const struct inode *inode, u32 *secid); +void security_inode_getsecid(struct inode *inode, u32 *secid); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); @@ -355,6 +356,7 @@ int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); +void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); @@ -380,25 +382,25 @@ static inline int security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -728,7 +730,7 @@ static inline int security_inode_killpriv(struct dentry *dentry) return cap_inode_killpriv(dentry); } -static inline int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc) +static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc) { return -EOPNOTSUPP; } @@ -743,7 +745,7 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer, return 0; } -static inline void security_inode_getsecid(const struct inode *inode, u32 *secid) +static inline void security_inode_getsecid(struct inode *inode, u32 *secid) { *secid = 0; } @@ -1102,6 +1104,10 @@ static inline void security_release_secctx(char *secdata, u32 seclen) { } +static inline void security_inode_invalidate_secctx(struct inode *inode) +{ +} + static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen) { return -EOPNOTSUPP; @@ -1142,8 +1148,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); int security_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1283,11 +1290,13 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline void security_sk_classify_flow(struct sock *sk, + struct flowi_common *flic) { } -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static inline void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { } @@ -1369,9 +1378,9 @@ void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic); #else /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1423,7 +1432,8 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, + const struct flowi_common *flic) { return 1; } @@ -1433,7 +1443,8 @@ static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return 0; } -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +static inline void security_skb_classify_flow(struct sk_buff *skb, + struct flowi_common *flic) { } @@ -1623,6 +1634,50 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#ifdef CONFIG_BPF_SYSCALL +#ifdef CONFIG_SECURITY +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); +extern int security_bpf_prog(struct bpf_prog *prog); +extern int security_bpf_map_alloc(struct bpf_map *map); +extern void security_bpf_map_free(struct bpf_map *map); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +#else +static inline int security_bpf(int cmd, union bpf_attr *attr, + unsigned int size) +{ + return 0; +} + +static inline int security_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + return 0; +} + +static inline int security_bpf_prog(struct bpf_prog *prog) +{ + return 0; +} + +static inline int security_bpf_map_alloc(struct bpf_map *map) +{ + return 0; +} + +static inline void security_bpf_map_free(struct bpf_map *map) +{ } + +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +{ } +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_BPF_SYSCALL */ + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void) diff --git a/include/linux/selection.h b/include/linux/selection.h index 85193aa8c1e3..ca3d9513b5ac 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -12,8 +12,8 @@ struct tty_struct; -extern struct vc_data *sel_cons; struct tty_struct; +struct vc_data; extern void clear_selection(void); extern int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty); @@ -22,6 +22,8 @@ extern int sel_loadlut(char __user *p); extern int mouse_reporting(void); extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); +bool vc_is_sel(struct vc_data *vc); + extern int console_blanked; extern unsigned char color_table[]; diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fb7eb9ccb1cd..d4c3c9bab582 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -29,7 +29,7 @@ static inline void seq_buf_clear(struct seq_buf *s) } static inline void -seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size) +seq_buf_init(struct seq_buf *s, char *buf, unsigned int size) { s->buffer = buf; s->size = size; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index f3d45dd42695..e305b66a9fb9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -117,9 +117,9 @@ __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *s); -void seq_put_decimal_ull(struct seq_file *m, char delimiter, +void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); -void seq_put_decimal_ll(struct seq_file *m, char delimiter, long long num); +void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); void seq_escape(struct seq_file *m, const char *s, const char *esc); void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e0582106ef4f..a10f36378417 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -242,6 +242,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * + * Note that, writes surrounding the barrier should be declared atomic (e.g. + * via WRITE_ONCE): a) to ensure the writes become visible to other threads + * atomically, avoiding compiler optimizations; b) to document which writes are + * meant to propagate to the reader critical section. This is necessary because + * neither writes before and after the barrier are enclosed in a seq-writer + * critical section that would ensure readers are aware of ongoing writes. + * * seqcount_t seq; * bool X = true, Y = false; * @@ -261,11 +268,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * * void write(void) * { - * Y = true; + * WRITE_ONCE(Y, true); * * raw_write_seqcount_barrier(seq); * - * X = false; + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index f276869a945e..ca15d91bdb36 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -160,6 +160,7 @@ struct uart_port { struct console *cons; /* struct console, if any */ #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(SUPPORT_SYSRQ) unsigned long sysrq; /* sysrq timeout */ + unsigned int sysrq_ch; /* char for sysrq */ #endif /* flags must be updated while holding port mutex */ @@ -445,8 +446,42 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) } return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (port->sysrq) { + if (ch && time_before(jiffies, port->sysrq)) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + port->sysrq = 0; + } + return 0; +} +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else -#define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; }) +static inline int +uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} #endif /* diff --git a/include/linux/shm.h b/include/linux/shm.h index 6fb801686ad6..fbb74824f0df 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -19,9 +19,18 @@ struct shmid_kernel /* private to the kernel */ pid_t shm_lprid; struct user_struct *mlock_user; - /* The task created the shm object. NULL if the task is dead. */ + /* + * The task created the shm object, for + * task_lock(shp->shm_creator) + */ struct task_struct *shm_creator; - struct list_head shm_clist; /* list by creator */ + + /* + * List by creator. task_lock(->shm_creator) required for read/write. + * If list_empty(), then the creator is dead already. + */ + struct list_head shm_clist; + struct ipc_namespace *ns; }; /* shm_mode upper byte flags */ diff --git a/include/linux/signal.h b/include/linux/signal.h index bcc094cb697c..649cd9fc63ca 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -313,6 +313,9 @@ extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); +#define SIG_KTHREAD ((__force __sighandler_t)2) +#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) + static inline void allow_signal(int sig) { /* @@ -320,7 +323,17 @@ static inline void allow_signal(int sig) * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ - kernel_sigaction(sig, (__force __sighandler_t)2); + kernel_sigaction(sig, SIG_KTHREAD); +} + +static inline void allow_kernel_signal(int sig) +{ + /* + * Kernel threads handle their own signals. Let the signal code + * know signals sent by the kernel will be handled, so that they + * don't get silently dropped. + */ + kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e..0cda61855d90 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 735ff1525f48..0c62b1d98ea0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include <net/flow_dissector.h> #include <linux/splice.h> #include <linux/in6.h> +#include <linux/if_packet.h> #include <net/flow.h> /* A. Checksumming of received packets by device. @@ -221,6 +222,11 @@ struct sk_buff; #endif extern int sysctl_max_skb_frags; +/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to + * segment using its current segmentation instead. + */ +#define GSO_BY_FRAGS 0xFFFF + typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { @@ -595,6 +601,16 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, @@ -799,6 +815,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) return (struct rtable *)skb_dst(skb); } +/* For mangling skb->pkt_type from user space side from applications + * such as nft, tc, etc, we only allow a conservative subset of + * possible pkt_types to be set. +*/ +static inline bool skb_pkt_type_ok(u32 ptype) +{ + return ptype <= PACKET_OTHERHOST; +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); @@ -1122,6 +1147,11 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } +/* Iterate through singly-linked GSO fragments of an skb. */ +#define skb_list_walk_safe(first, skb, next_skb) \ + for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) + /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1439,6 +1469,18 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) } /** + * skb_queue_len_lockless - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + * This variant can be used in lockless contexts. + */ +static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_) +{ + return READ_ONCE(list_->qlen); +} + +/** * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head * @list: queue to initialize * @@ -1490,7 +1532,7 @@ static inline void __skb_insert(struct sk_buff *newsk, newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; - list->qlen++; + WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, @@ -1641,7 +1683,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; - list->qlen--; + WRITE_ONCE(list->qlen, list->qlen - 1); next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; @@ -2188,7 +2230,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) int ___pskb_trim(struct sk_buff *skb, unsigned int len); -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (unlikely(skb_is_nonlinear(skb))) { WARN_ON(1); @@ -2198,6 +2240,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + __skb_set_length(skb, len); +} + void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) @@ -2228,6 +2275,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) BUG_ON(err); } +static inline int __skb_grow(struct sk_buff *skb, unsigned int len) +{ + unsigned int diff = len - skb->len; + + if (skb_tailroom(skb) < diff) { + int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), + GFP_ATOMIC); + if (ret) + return ret; + } + __skb_set_length(skb, len); + return 0; +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -2651,7 +2712,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2738,6 +2799,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb) __skb_linearize(skb) : 0; } +static __always_inline void +__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_sub(skb->csum, + csum_partial(start, len, 0), off); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; +} + /** * skb_postpull_rcsum - update checksum for received skb after pull * @skb: buffer to update @@ -2748,36 +2821,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ - static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); - else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) < 0) - skb->ip_summed = CHECKSUM_NONE; + __skb_postpull_rcsum(skb, start, len, 0); } -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static __always_inline void +__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_add(skb->csum, + csum_partial(start, len, 0), off); +} +/** + * skb_postpush_rcsum - update checksum for received skb after push + * @skb: buffer to update + * @start: start of data after push + * @len: length of data pushed + * + * After doing a push on a received packet, you need to call this to + * update the CHECKSUM_COMPLETE checksum. + */ static inline void skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - /* For performing the reverse operation to skb_postpull_rcsum(), - * we can instead of ... - * - * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); - * - * ... just use this equivalent version here to save a few - * instructions. Feeding csum of 0 in csum_partial() and later - * on adding skb->csum is equivalent to feed skb->csum in the - * first place. - */ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_partial(start, len, skb->csum); + __skb_postpush_rcsum(skb, start, len, 0); } +unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); + /** * skb_push_rcsum - push skb and update receive checksum * @skb: buffer to update @@ -2821,6 +2896,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) #define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) #define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) +static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + __skb_trim(skb, len); + return 0; +} + +static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + return __skb_grow(skb, len); +} + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3589,6 +3679,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +static inline void skb_gso_reset(struct sk_buff *skb) +{ + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index a0596ca0e80a..a2f8109bb215 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +u64 sock_gen_cookie(struct sock *sk); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 01cf8b6ac61a..8a388b5e9ea9 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -427,6 +427,9 @@ struct spi_master { #define SPI_MASTER_MUST_RX BIT(3) /* requires rx */ #define SPI_MASTER_MUST_TX BIT(4) /* requires tx */ + /* flag indicating this is a non-devres managed controller */ + bool devm_allocated; + /* flag indicating this is an SPI slave controller */ bool slave; @@ -595,6 +598,9 @@ static inline struct spi_master *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +extern struct spi_master *devm_spi_alloc_master(struct device *dev, + unsigned int size); + extern int spi_register_master(struct spi_master *master); extern int devm_spi_register_master(struct device *dev, struct spi_master *master); diff --git a/include/linux/string.h b/include/linux/string.h index 870268d42ae7..9f745d7e9f3f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -28,6 +28,10 @@ size_t strlcpy(char *, const char *, size_t); #ifndef __HAVE_ARCH_STRSCPY ssize_t strscpy(char *, const char *, size_t); #endif + +/* Wraps calls to strscpy()/memset(), no arch specific code required */ +ssize_t strscpy_pad(char *dest, const char *src, size_t count); + #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif @@ -98,6 +102,36 @@ extern __kernel_size_t strcspn(const char *,const char *); #ifndef __HAVE_ARCH_MEMSET extern void * memset(void *,int,__kernel_size_t); #endif + +#ifndef __HAVE_ARCH_MEMSET16 +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET32 +extern void *memset32(uint32_t *, uint32_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET64 +extern void *memset64(uint64_t *, uint64_t, __kernel_size_t); +#endif + +static inline void *memset_l(unsigned long *p, unsigned long v, + __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, v, n); + else + return memset64((uint64_t *)p, v, n); +} + +static inline void *memset_p(void **p, void *v, __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, (uintptr_t)v, n); + else + return memset64((uint64_t *)p, (uintptr_t)v, n); +} + #ifndef __HAVE_ARCH_MEMCPY extern void * memcpy(void *,const void *,__kernel_size_t); #endif @@ -170,4 +204,7 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } +void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, + int pad); + #endif /* _LINUX_STRING_H_ */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 1f911ccb2a75..4e8a8120f2a0 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -81,6 +81,7 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + struct auth_domain *domain; }; /* Different mechanisms (e.g., krb5 or spkm3) may implement gss-api, and diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 807371357160..59cbf16eaeb5 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -158,9 +158,9 @@ typedef __be32 rpc_fraghdr; /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN (4u) +#define RPCBIND_MAXNETIDLEN (5u) /* * Universal addresses are introduced in RFC 1833 and further spelled diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 726aff1a5201..213fa12f56fc 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -20,7 +20,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); -int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor, + char *name); u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* __KERNEL__ */ diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 70c6b92e15a7..8def5e0a491f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -23,8 +23,7 @@ #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 5d2779aa4bbe..cf569d676909 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -879,6 +879,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); asmlinkage long sys_setns(int fd, int nstype); +asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags); asmlinkage long sys_process_vm_readv(pid_t pid, const struct iovec __user *lvec, unsigned long liovcnt, @@ -909,4 +910,8 @@ asmlinkage long sys_membarrier(int cmd, int flags); asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); +asmlinkage long sys_pidfd_send_signal(int pidfd, int sig, + siginfo_t __user *info, + unsigned int flags); + #endif diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 2839d624d5ee..18fecaa860b7 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -300,6 +300,11 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) return kernfs_enable_ns(kn); } +__printf(2, 3) +int sysfs_emit(char *buf, const char *fmt, ...); +__printf(3, 4) +int sysfs_emit_at(char *buf, int at, const char *fmt, ...); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) @@ -506,6 +511,17 @@ static inline void sysfs_enable_ns(struct kernfs_node *kn) { } +__printf(2, 3) +static inline int sysfs_emit(char *buf, const char *fmt, ...) +{ + return 0; +} + +__printf(3, 4) +static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) +{ + return 0; +} #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, diff --git a/include/linux/syslog.h b/include/linux/syslog.h index c3a7f0cc3a27..e1c3632f4e81 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -49,13 +49,4 @@ int do_syslog(int type, char __user *buf, int count, int source); -#ifdef CONFIG_PRINTK -int check_syslog_permissions(int type, int source); -#else -static inline int check_syslog_permissions(int type, int source) -{ - return 0; -} -#endif - #endif /* _LINUX_SYSLOG_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 085da1707cea..b9e591582be9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -211,7 +211,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 tlp_retrans:1, /* TLP is a retransmission */ + unused_1:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -225,7 +226,7 @@ struct tcp_sock { syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ /* RTT measurement */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ diff --git a/include/linux/time.h b/include/linux/time.h index 297f09f23896..cbb55e004342 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -9,6 +9,7 @@ extern struct timezone sys_tz; #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) +#define TIMER_LOCK_TIGHT_LOOP_DELAY_NS 350 static inline int timespec_equal(const struct timespec *a, const struct timespec *b) @@ -262,4 +263,16 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) a->tv_nsec = ns; } +/** + * time_between32 - check if a 32-bit timestamp is within a given time range + * @t: the time which may be within [l,h] + * @l: the lower bound of the range + * @h: the higher bound of the range + * + * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are + * treated as 32-bit integers. + * + * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). + */ +#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) #endif diff --git a/include/linux/time64.h b/include/linux/time64.h index 367d5af899e8..10239cffd70f 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -197,6 +197,10 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { + /* Prevent multiplication overflow */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2181ae5db42e..17fe42e836b4 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -302,14 +302,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -562,12 +585,23 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -586,6 +620,7 @@ extern int trace_define_field(struct trace_event_call *call, const char *type, int is_signed, int filter_type); extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); +extern int trace_event_get_offsets(struct trace_event_call *call); #define is_signed_type(type) (((type)(-1)) < (type)1) @@ -622,16 +657,22 @@ extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); -extern void *perf_trace_buf_prepare(int size, unsigned short type, - struct pt_regs **regs, int *rctxp); +void perf_trace_buf_update(void *record, u16 type); +void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); + +void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, + struct trace_event_call *call, u64 count, + struct pt_regs *regs, struct hlist_head *head, + struct task_struct *task); static inline void -perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, +perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, struct task_struct *task) { - perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index cfaf5a1d4bad..f5be2716b01c 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -11,7 +11,7 @@ */ struct trace_seq { - unsigned char buffer[PAGE_SIZE]; + char buffer[PAGE_SIZE]; struct seq_buf seq; int full; }; @@ -50,7 +50,7 @@ static inline int trace_seq_used(struct trace_seq *s) * that is about to be written to and then return the result * of that write. */ -static inline unsigned char * +static inline char * trace_seq_buffer_ptr(struct trace_seq *s) { return s->buffer + seq_buf_used(&s->seq); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 27e32b2b602f..d7875d312f1f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -328,7 +328,7 @@ extern void syscall_unregfunc(void); static const char *___tp_str __tracepoint_string = str; \ ___tp_str; \ }) -#define __tracepoint_string __attribute__((section("__tracepoint_str"))) +#define __tracepoint_string __attribute__((section("__tracepoint_str"), used)) #else /* * tracepoint_string() is used to save the string address for userspace diff --git a/include/linux/tty.h b/include/linux/tty.h index 812cdd8cff22..68392a4ca28a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -12,7 +12,7 @@ #include <uapi/linux/tty.h> #include <linux/rwsem.h> #include <linux/llist.h> - +#include <linux/kthread.h> /* * Lock subclasses for tty locks @@ -64,7 +64,7 @@ struct tty_buffer { int read; int flags; /* Data points here */ - unsigned long data[0]; + unsigned long data[]; }; /* Values for .flags field of tty_buffer */ @@ -82,7 +82,7 @@ static inline char *flag_buf_ptr(struct tty_buffer *b, int ofs) struct tty_bufhead { struct tty_buffer *head; /* Queue head */ - struct work_struct work; + struct kthread_work work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; @@ -240,6 +240,8 @@ struct tty_port { based drain is needed else set to size of fifo */ struct kref kref; /* Ref counter */ + struct kthread_worker worker; /* worker thread */ + struct task_struct *worker_thread; /* worker thread */ }; /* @@ -280,6 +282,10 @@ struct tty_struct { struct termiox *termiox; /* May be NULL for unsupported */ char name[64]; struct pid *pgrp; /* Protected by ctrl lock */ + /* + * Writes protected by both ctrl lock and legacy mutex, readers must use + * at least one of them. + */ struct pid *session; unsigned long flags; int count; @@ -580,6 +586,8 @@ static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } +extern int tty_port_set_policy(struct tty_port *port, int policy, + int sched_priority); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 161052477f77..6d8db1555260 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -235,7 +235,7 @@ * * Called when the device receives a TIOCGICOUNT ioctl. Passed a kernel * structure to complete. This method is optional and will only be called - * if provided (otherwise EINVAL will be returned). + * if provided (otherwise ENOTTY will be returned). */ #include <linux/export.h> diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h index d43837f2ce3a..7a97606ddb36 100644 --- a/include/linux/tty_flip.h +++ b/include/linux/tty_flip.h @@ -39,4 +39,7 @@ static inline int tty_insert_flip_string(struct tty_port *port, extern void tty_buffer_lock_exclusive(struct tty_port *port); extern void tty_buffer_unlock_exclusive(struct tty_port *port); +int tty_insert_flip_string_and_push_buffer(struct tty_port *port, + const unsigned char *chars, size_t cnt); + #endif /* _LINUX_TTY_FLIP_H */ diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index df89c9bcba7d..7b38288dc239 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -68,12 +68,13 @@ struct u64_stats_sync { }; +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(&syncp->seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9442423979c1..cc5ba47062e8 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -91,6 +91,17 @@ extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* + * probe_user_read(): safely attempt to read from a location in user space + * @dst: pointer to the buffer that shall take the data + * @src: address to read from + * @size: size of the data chunk + * + * Safely read from address @src to the buffer at @dst. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long probe_user_read(void *dst, const void __user *src, size_t size); + +/* * probe_kernel_write(): safely attempt to write to a location * @dst: address to write to * @src: pointer to the data that shall be written @@ -102,7 +113,22 @@ extern long __probe_kernel_read(void *dst, const void *src, size_t size); extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); +/* + * probe_user_write(): safely attempt to write to a location in user space + * @dst: address to write to + * @src: pointer to the data that shall be written + * @size: size of the data chunk + * + * Safely write to address @dst from the buffer at @src. If a kernel fault + * happens, handle that and return -EFAULT. + */ +extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); +extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size); + extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, + long count); +extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/include/linux/udp.h b/include/linux/udp.h index 87c094961bd5..32342754643a 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } -#define udp_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry(__sk, list) \ + hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) -#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_rcu(__sk, list) \ + hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) diff --git a/include/linux/usb.h b/include/linux/usb.h index eab41e3be816..8ccb8ebb16aa 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -97,6 +97,41 @@ enum usb_interface_condition { USB_INTERFACE_UNBINDING, }; +int __must_check +usb_find_common_endpoints(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out); + +static inline int __must_check +usb_find_bulk_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in) +{ + return usb_find_common_endpoints(alt, bulk_in, NULL, NULL, NULL); +} + +static inline int __must_check +usb_find_bulk_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_out) +{ + return usb_find_common_endpoints(alt, NULL, bulk_out, NULL, NULL); +} + +static inline int __must_check +usb_find_int_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_in) +{ + return usb_find_common_endpoints(alt, NULL, NULL, int_in, NULL); +} + +static inline int __must_check +usb_find_int_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_out) +{ + return usb_find_common_endpoints(alt, NULL, NULL, NULL, int_out); +} + /** * struct usb_interface - what usb device drivers talk to * @altsetting: array of interface structures, one for each alternate @@ -506,6 +541,7 @@ struct usb3_lpm_parameters { * @level: number of USB hub ancestors * @can_submit: URBs may be submitted * @persist_enabled: USB_PERSIST enabled for this device + * @reset_in_progress: the device is being reset * @have_langid: whether string_langid is valid * @authorized: policy has said we can use it; * (user space) policy determines if we authorize this device to be @@ -585,6 +621,7 @@ struct usb_device { unsigned can_submit:1; unsigned persist_enabled:1; + unsigned reset_in_progress:1; unsigned have_langid:1; unsigned authorized:1; unsigned authenticated:1; @@ -1677,6 +1714,8 @@ static inline int usb_urb_dir_out(struct urb *urb) return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT; } +int usb_urb_ep_type_check(const struct urb *urb); + void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags, dma_addr_t *dma); void usb_free_coherent(struct usb_device *dev, size_t size, diff --git a/include/linux/usb/irda.h b/include/linux/usb/irda.h index e345ceaf72d6..9dc46010a067 100644 --- a/include/linux/usb/irda.h +++ b/include/linux/usb/irda.h @@ -118,11 +118,22 @@ struct usb_irda_cs_descriptor { * 6 - 115200 bps * 7 - 576000 bps * 8 - 1.152 Mbps - * 9 - 5 mbps + * 9 - 4 Mbps * 10..15 - Reserved */ #define USB_IRDA_STATUS_LINK_SPEED 0x0f +#define USB_IRDA_LS_NO_CHANGE 0 +#define USB_IRDA_LS_2400 1 +#define USB_IRDA_LS_9600 2 +#define USB_IRDA_LS_19200 3 +#define USB_IRDA_LS_38400 4 +#define USB_IRDA_LS_57600 5 +#define USB_IRDA_LS_115200 6 +#define USB_IRDA_LS_576000 7 +#define USB_IRDA_LS_1152000 8 +#define USB_IRDA_LS_4000000 9 + /* The following is a 4-bit value used only for * outbound header: * diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index ea4f81c2a6d5..602dff213bae 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -59,4 +59,7 @@ /* Device needs a pause after every control message. */ #define USB_QUIRK_DELAY_CTRL_MSG BIT(13) +/* device has blacklisted endpoints */ +#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 8297e5b341d8..190cf0760815 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,6 +72,9 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); +extern bool current_in_userns(const struct user_namespace *target_ns); + +struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -100,6 +103,16 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) { return true; } + +static inline bool current_in_userns(const struct user_namespace *target_ns) +{ + return true; +} + +static inline struct ns_common *ns_get_owner(struct ns_common *ns) +{ + return ERR_PTR(-EPERM); +} #endif #endif /* _LINUX_USER_H */ diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 584f9a647ad4..6b6c1138f963 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -11,7 +11,8 @@ #define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) #define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE virtio_transport_max_vsock_pkt_buf_size +extern uint virtio_transport_max_vsock_pkt_buf_size; enum { VSOCK_VQ_RX = 0, /* for host to guest data */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bad3ea4ded8e..4df88614319a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -96,8 +96,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 8d7634247fb4..c39d30db067c 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -142,7 +142,7 @@ static inline bool vt_force_oops_output(struct vc_data *vc) return false; } -extern char vt_dont_switch; +extern bool vt_dont_switch; extern int default_utf8; extern int global_cursor_default; diff --git a/include/linux/wait.h b/include/linux/wait.h index 513b36f04dfd..419b5b2bf547 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -151,6 +151,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_pollfree(wait_queue_head_t *wq_head); void __wake_up_bit(wait_queue_head_t *, void *, int); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); @@ -185,6 +186,31 @@ wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_DESTROY_BY_RCU. + */ +static inline void wake_up_pollfree(wait_queue_head_t *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h index d84d8c301546..a015ce5c830f 100644 --- a/include/linux/wakeup_reason.h +++ b/include/linux/wakeup_reason.h @@ -18,15 +18,87 @@ #ifndef _LINUX_WAKEUP_REASON_H #define _LINUX_WAKEUP_REASON_H +#include <linux/types.h> +#include <linux/completion.h> + #define MAX_SUSPEND_ABORT_LEN 256 -void log_wakeup_reason(int irq); -int check_wakeup_reason(int irq); +struct wakeup_irq_node { + /* @leaf is a linked list of all leaf nodes in the interrupts trees. + */ + struct list_head next; + /* @irq: IRQ number of this node. + */ + int irq; + struct irq_desc *desc; + + /* @siblings contains the list of irq nodes at the same depth; at a + * depth of zero, this is the list of base wakeup interrupts. + */ + struct list_head siblings; + /* @parent: only one node in a siblings list has a pointer to the + * parent; that node is the head of the list of siblings. + */ + struct wakeup_irq_node *parent; + /* @child: any node can have one child + */ + struct wakeup_irq_node *child; + /* @handled: this flag is set to true when the interrupt handler (one of + * handle_.*_irq in kernel/irq/handle.c) for this node gets called; it is set + * to false otherwise. We use this flag to determine whether a subtree rooted + * at a node has been handled. When all trees rooted at + * base-wakeup-interrupt nodes have been handled, we stop logging + * potential wakeup interrupts, and construct the list of actual + * wakeups from the leaves of these trees. + */ + bool handled; +}; + +#ifdef CONFIG_DEDUCE_WAKEUP_REASONS + +/* Called in the resume path, with interrupts and nonboot cpus disabled; on + * need for a spinlock. + */ +static inline void start_logging_wakeup_reasons(void) +{ + extern bool log_wakeups; + extern struct completion wakeups_completion; + ACCESS_ONCE(log_wakeups) = true; + init_completion(&wakeups_completion); +} + +static inline bool logging_wakeup_reasons_nosync(void) +{ + extern bool log_wakeups; + return ACCESS_ONCE(log_wakeups); +} + +static inline bool logging_wakeup_reasons(void) +{ + smp_rmb(); + return logging_wakeup_reasons_nosync(); +} + +bool log_possible_wakeup_reason(int irq, + struct irq_desc *desc, + bool (*handler)(struct irq_desc *)); -#ifdef CONFIG_SUSPEND -void log_suspend_abort_reason(const char *fmt, ...); #else -static inline void log_suspend_abort_reason(const char *fmt, ...) { } + +static inline void start_logging_wakeup_reasons(void) {} +static inline bool logging_wakeup_reasons_nosync(void) { return false; } +static inline bool logging_wakeup_reasons(void) { return false; } +static inline bool log_possible_wakeup_reason(int irq, + struct irq_desc *desc, + bool (*handler)(struct irq_desc *)) { return true; } + #endif +const struct list_head* +get_wakeup_reasons(unsigned long timeout, struct list_head *unfinished); +void log_base_wakeup_reason(int irq); +void clear_wakeup_reasons(void); +void log_suspend_abort_reason(const char *fmt, ...); +int check_wakeup_reason(int irq); + #endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0703a632e340..58f3d8ed5727 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -443,6 +443,7 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 2219cce81ca4..4638dddc040b 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -36,7 +36,7 @@ enum zs_mapmode { struct zs_pool_stats { /* How many pages were migrated (freed) */ - unsigned long pages_compacted; + atomic_long_t pages_compacted; }; struct zs_pool; diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index 3f284bc03180..5650c1628383 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -138,7 +138,7 @@ do { \ _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ } while (0) -#define _FP_ROUND_ZERO(wc, X) 0 +#define _FP_ROUND_ZERO(wc, X) (void)0 #define _FP_ROUND_PINF(wc, X) \ do { \ diff --git a/include/media/davinci/vpbe.h b/include/media/davinci/vpbe.h index 4376beeb28c2..5d8ceeddc797 100644 --- a/include/media/davinci/vpbe.h +++ b/include/media/davinci/vpbe.h @@ -96,7 +96,7 @@ struct vpbe_config { struct encoder_config_info *ext_encoders; /* amplifier information goes here */ struct amp_config_info *amp; - int num_outputs; + unsigned int num_outputs; /* Order is venc outputs followed by LCD and then external encoders */ struct vpbe_output *outputs; }; diff --git a/include/media/media-device.h b/include/media/media-device.h index 6e6db78f1ee2..00bbd679864a 100644 --- a/include/media/media-device.h +++ b/include/media/media-device.h @@ -60,7 +60,7 @@ struct device; struct media_device { /* dev->driver_data points to this struct. */ struct device *dev; - struct media_devnode devnode; + struct media_devnode *devnode; char model[32]; char serial[40]; @@ -84,9 +84,6 @@ struct media_device { #define MEDIA_DEV_NOTIFY_PRE_LINK_CH 0 #define MEDIA_DEV_NOTIFY_POST_LINK_CH 1 -/* media_devnode to media_device */ -#define to_media_device(node) container_of(node, struct media_device, devnode) - int __must_check __media_device_register(struct media_device *mdev, struct module *owner); #define media_device_register(mdev) __media_device_register(mdev, THIS_MODULE) diff --git a/include/media/media-devnode.h b/include/media/media-devnode.h index 17ddae32060d..d5ff95bf2d4b 100644 --- a/include/media/media-devnode.h +++ b/include/media/media-devnode.h @@ -33,6 +33,8 @@ #include <linux/device.h> #include <linux/cdev.h> +struct media_device; + /* * Flag to mark the media_devnode struct as registered. Drivers must not touch * this flag directly, it will be set and cleared by media_devnode_register and @@ -67,6 +69,8 @@ struct media_file_operations { * before registering the node. */ struct media_devnode { + struct media_device *media_dev; + /* device ops */ const struct media_file_operations *fops; @@ -80,24 +84,42 @@ struct media_devnode { unsigned long flags; /* Use bitops to access flags */ /* callbacks */ - void (*release)(struct media_devnode *mdev); + void (*release)(struct media_devnode *devnode); }; /* dev to media_devnode */ #define to_media_devnode(cd) container_of(cd, struct media_devnode, dev) -int __must_check media_devnode_register(struct media_devnode *mdev, +int __must_check media_devnode_register(struct media_device *mdev, + struct media_devnode *devnode, struct module *owner); -void media_devnode_unregister(struct media_devnode *mdev); + +/** + * media_devnode_unregister_prepare - clear the media device node register bit + * @devnode: the device node to prepare for unregister + * + * This clears the passed device register bit. Future open calls will be met + * with errors. Should be called before media_devnode_unregister() to avoid + * races with unregister and device file open calls. + * + * This function can safely be called if the device node has never been + * registered or has already been unregistered. + */ +void media_devnode_unregister_prepare(struct media_devnode *devnode); + +void media_devnode_unregister(struct media_devnode *devnode); static inline struct media_devnode *media_devnode_data(struct file *filp) { return filp->private_data; } -static inline int media_devnode_is_registered(struct media_devnode *mdev) +static inline int media_devnode_is_registered(struct media_devnode *devnode) { - return test_bit(MEDIA_FLAG_REGISTERED, &mdev->flags); + if (!devnode) + return false; + + return test_bit(MEDIA_FLAG_REGISTERED, &devnode->flags); } #endif /* _MEDIA_DEVNODE_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c796f93aaa70..fa06e7418f41 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -90,7 +90,10 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); +int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); @@ -193,8 +196,10 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex, const struct in6_addr *addr); - int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, - struct dst_entry **dst, struct flowi6 *fl6); + struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, + const struct in6_addr *final_dst); void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -240,6 +245,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_ac_close(struct sock *sk); void ipv6_sock_ac_close(struct sock *sk); int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd60eccb59a6..79f2e1ccfcfb 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -8,6 +8,7 @@ void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); +void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7c0c83dfe86e..e1e181059324 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -498,6 +498,7 @@ struct hci_chan { struct sk_buff_head data_q; unsigned int sent; __u8 state; + bool amp; }; struct hci_conn_params { @@ -1012,6 +1013,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); @@ -1235,16 +1237,34 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status) conn->security_cfm_cb(conn, status); } -static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status, - __u8 encrypt) +static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status) { struct hci_cb *cb; + __u8 encrypt; + + if (conn->state == BT_CONFIG) { + if (!status) + conn->state = BT_CONNECTED; - if (conn->sec_level == BT_SECURITY_SDP) - conn->sec_level = BT_SECURITY_LOW; + hci_connect_cfm(conn, status); + hci_conn_drop(conn); + return; + } - if (conn->pending_sec_level > conn->sec_level) - conn->sec_level = conn->pending_sec_level; + if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags)) + encrypt = 0x00; + else if (test_bit(HCI_CONN_AES_CCM, &conn->flags)) + encrypt = 0x02; + else + encrypt = 0x01; + + if (!status) { + if (conn->sec_level == BT_SECURITY_SDP) + conn->sec_level = BT_SECURITY_LOW; + + if (conn->pending_sec_level > conn->sec_level) + conn->sec_level = conn->pending_sec_level; + } mutex_lock(&hci_cb_list_lock); list_for_each_entry(cb, &hci_cb_list, list) { diff --git a/include/net/bonding.h b/include/net/bonding.h index d5abd3a80896..1d85c5179fa8 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -34,6 +34,9 @@ #define BOND_DEFAULT_MIIMON 100 +#ifndef __long_aligned +#define __long_aligned __attribute__((aligned((sizeof(long))))) +#endif /* * Less bad way to call ioctl from within the kernel; this needs to be * done some other way to get the call out of interrupt context. @@ -138,7 +141,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; - u8 ad_actor_system[ETH_ALEN]; + + /* 2 bytes of padding : see ether_addr_equal_64bits() */ + u8 ad_actor_system[ETH_ALEN + 2]; }; struct bond_parm_tbl { @@ -176,6 +181,11 @@ struct slave { struct rtnl_link_stats64 slave_stats; }; +static inline struct slave *to_slave(struct kobject *kobj) +{ + return container_of(kobj, struct slave, kobj); +} + struct bond_up_slave { unsigned int count; struct rcu_head rcu; @@ -662,6 +672,9 @@ extern struct bond_parm_tbl ad_select_tbl[]; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; +/* exported from bond_sysfs_slave.c */ +extern const struct sysfs_ops slave_sysfs_ops; + static inline void bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { atomic_long_inc(&dev->tx_dropped); diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h index 028b754ae9b1..0baf2e21a533 100644 --- a/include/net/caif/caif_dev.h +++ b/include/net/caif/caif_dev.h @@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer); * The link_support layer is used to add any Link Layer specific * framing. */ -void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, +int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, struct cflayer *link_support, int head_room, struct cflayer **layer, int (**rcv_func)( struct sk_buff *, struct net_device *, diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h index 70bfd017581f..219094ace893 100644 --- a/include/net/caif/cfcnfg.h +++ b/include/net/caif/cfcnfg.h @@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg); * @fcs: Specify if checksum is used in CAIF Framing Layer. * @head_room: Head space needed by link specific protocol. */ -void +int cfcnfg_add_phy_layer(struct cfcnfg *cnfg, struct net_device *dev, struct cflayer *phy_layer, enum cfcnfg_phy_preference pref, diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h index b5b020f3c72e..bc3fae07a25f 100644 --- a/include/net/caif/cfserl.h +++ b/include/net/caif/cfserl.h @@ -9,4 +9,5 @@ #include <net/caif/caif_layer.h> struct cflayer *cfserl_create(int instance, bool use_stx); +void cfserl_release(struct cflayer *layer); #endif diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fed8f33ea41c..2e11af8cdb02 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -102,26 +102,6 @@ struct wiphy; */ /** - * enum ieee80211_band - supported frequency bands - * - * The bands are assigned this way because the supported - * bitrates differ in these bands. - * - * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band - * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) - * @IEEE80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) - * @IEEE80211_NUM_BANDS: number of defined bands - */ -enum ieee80211_band { - IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, - IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, - IEEE80211_BAND_60GHZ = NL80211_BAND_60GHZ, - - /* keep last */ - IEEE80211_NUM_BANDS -}; - -/** * enum ieee80211_channel_flags - channel flags * * Channel flags set by the regulatory control code. @@ -201,7 +181,7 @@ enum ieee80211_channel_flags { * @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels. */ struct ieee80211_channel { - enum ieee80211_band band; + enum nl80211_band band; u16 center_freq; u16 hw_value; u32 flags; @@ -358,7 +338,7 @@ struct ieee80211_sta_vht_cap { struct ieee80211_supported_band { struct ieee80211_channel *channels; struct ieee80211_rate *bitrates; - enum ieee80211_band band; + enum nl80211_band band; int n_channels; int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; @@ -733,7 +713,7 @@ struct cfg80211_bitrate_mask { u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; u16 vht_mcs[NL80211_VHT_NSS_MAX]; enum nl80211_txrate_gi gi; - } control[IEEE80211_NUM_BANDS]; + } control[NUM_NL80211_BANDS]; }; /** @@ -1454,7 +1434,7 @@ struct mesh_setup { bool user_mpm; u8 dtim_period; u16 beacon_interval; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; struct cfg80211_bitrate_mask beacon_rate; }; @@ -1553,7 +1533,7 @@ struct cfg80211_scan_request { size_t ie_len; u32 flags; - u32 rates[IEEE80211_NUM_BANDS]; + u32 rates[NUM_NL80211_BANDS]; struct wireless_dev *wdev; @@ -1981,7 +1961,7 @@ struct cfg80211_ibss_params { bool privacy; bool control_port; bool userspace_handles_dfs; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; }; @@ -1997,7 +1977,7 @@ struct cfg80211_ibss_params { struct cfg80211_bss_selection { enum nl80211_bss_select_attr behaviour; union { - enum ieee80211_band band_pref; + enum nl80211_band band_pref; struct cfg80211_bss_select_adjust adjust; } param; }; @@ -2903,7 +2883,7 @@ struct cfg80211_ops { int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, - int rate[IEEE80211_NUM_BANDS]); + int rate[NUM_NL80211_BANDS]); int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); @@ -3615,7 +3595,7 @@ struct wiphy { * help determine whether you own this wiphy or not. */ const void *privid; - struct ieee80211_supported_band *bands[IEEE80211_NUM_BANDS]; + struct ieee80211_supported_band *bands[NUM_NL80211_BANDS]; /* Lets us get back the wiphy on the callback */ void (*reg_notifier)(struct wiphy *wiphy, @@ -3959,7 +3939,7 @@ static inline void *wdev_priv(struct wireless_dev *wdev) * @band: band, necessary due to channel number overlap * Return: The corresponding frequency (in MHz), or 0 if the conversion failed. */ -int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band); +int ieee80211_channel_to_frequency(int chan, enum nl80211_band band); /** * ieee80211_frequency_to_channel - convert frequency to channel number @@ -4228,6 +4208,17 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type, const u8 *ies, int len); /** + * cfg80211_send_layer2_update - send layer 2 update frame + * + * @dev: network device + * @addr: STA MAC address + * + * Wireless drivers can use this function to update forwarding tables in bridge + * devices upon STA association. + */ +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr); + +/** * DOC: Regulatory enforcement infrastructure * * TODO @@ -4741,7 +4732,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, * cfg80211 then sends a notification to userspace. */ void cfg80211_notify_new_peer_candidate(struct net_device *dev, - const u8 *macaddr, const u8 *ie, u8 ie_len, gfp_t gfp); + const u8 *macaddr, const u8 *ie, u8 ie_len, + int sig_dbm, gfp_t gfp); /** * DOC: RFkill integration @@ -5591,7 +5583,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, * Returns %true if the conversion was successful, %false otherwise. */ bool ieee80211_operating_class_to_band(u8 operating_class, - enum ieee80211_band *band); + enum nl80211_band *band); /** * ieee80211_chandef_to_operating_class - convert chandef to operation class @@ -5968,4 +5960,11 @@ void cfg80211_update_owe_info_event(struct net_device *netdev, struct cfg80211_update_owe_info *owe_info, gfp_t gfp); +/* Due to our tree having a backport of + * 57fbcce37be7c1d2622b56587c10ade00e96afa3, this allows QC to support 4.7+ + * kernels that use the newer NL80211_BAND_* and older kernels that use the + * older IEEE80211_BAND_* enums. + */ +#define CFG80211_REMOVE_IEEE80211_BACKPORT 1 + #endif /* __NET_CFG80211_H */ diff --git a/include/net/checksum.h b/include/net/checksum.h index 9fcaedf994ee..c5b95e8fcd16 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) #define CSUM_MANGLED_0 ((__force __sum16)0xffff) +static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +{ + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); +} + static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ccd6d8bffa4d..c0a92e2c286d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -41,13 +41,12 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; + sock_cgroup_set_classid(skcd, classid); } static inline u32 task_get_classid(const struct sk_buff *skb) @@ -64,17 +63,17 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { - /* If there is an sk_classid we'll use that. */ + /* If there is an sock_cgroup_classid we'll use that. */ if (!skb->sk) return 0; - classid = skb->sk->sk_classid; + classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } diff --git a/include/net/dst.h b/include/net/dst.h index e4f450617919..1daa9e2a1b46 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -113,7 +113,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; atomic_t refcnt; -}; +} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ extern const struct dst_metrics dst_default_metrics; u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); @@ -404,6 +404,18 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, __skb_tunnel_rx(skb, dev, net); } +static inline u32 dst_tclassid(const struct sk_buff *skb) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + const struct dst_entry *dst; + + dst = skb_dst(skb); + if (dst) + return dst->tclassid; +#endif + return 0; +} + int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); static inline int dst_discard(struct sk_buff *skb) { @@ -470,7 +482,15 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, struct sk_buff *skb) { - struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL); + struct neighbour *n = NULL; + + /* The packets from tunnel devices (eg bareudp) may have only + * metadata in the dst pointer of skb. Hence a pointer check of + * neigh_lookup is needed. + */ + if (dst->ops->neigh_lookup) + n = dst->ops->neigh_lookup(dst, skb, NULL); + return IS_ERR(n) ? NULL : n; } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 30a56ab2ccfb..b55d2e854bf3 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -31,7 +31,9 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) return &md_dst->u.tun_info; dst = skb_dst(skb); - if (dst && dst->lwtstate) + if (dst && dst->lwtstate && + (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || + dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; @@ -125,7 +127,7 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, ip_tunnel_key_init(&tun_dst->u.tun_info.key, iph->saddr, iph->daddr, iph->tos, iph->ttl, - 0, 0, tunnel_id, flags); + 0, 0, 0, tunnel_id, flags); return tun_dst; } @@ -151,8 +153,11 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.u.ipv6.src = ip6h->saddr; info->key.u.ipv6.dst = ip6h->daddr; + info->key.tos = ipv6_get_dsfield(ip6h); info->key.ttl = ip6h->hop_limit; + info->key.label = ip6_flowlabel(ip6h); + return tun_dst; } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index bd2b5c007561..ec6f2d2010fd 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -91,6 +91,7 @@ struct fib_rules_ops { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ diff --git a/include/net/flow.h b/include/net/flow.h index 833080732dec..2dae98019fb2 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -191,11 +191,21 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) return container_of(fl4, struct flowi, u.ip4); } +static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) +{ + return &(fl4->__fl_common); +} + static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) { return container_of(fl6, struct flowi, u.ip6); } +static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) +{ + return &(fl6->__fl_common); +} + static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) { return container_of(fldn, struct flowi, u.dn); diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 62a462413081..a5a1a1650668 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/in6.h> #include <linux/siphash.h> +#include <linux/string.h> #include <uapi/linux/if_ether.h> /** @@ -185,4 +186,12 @@ static inline bool flow_keys_have_l4(struct flow_keys *keys) u32 flow_hash_from_keys(struct flow_keys *keys); +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 43c0e771f417..351766331519 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -33,12 +33,6 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks - * @mcast_bind: a socket bound to the given multicast group (which - * is given as the offset into the groups array) - * @mcast_unbind: a socket was unbound from the given multicast group. - * Note that unbind() will not be called symmetrically if the - * generic netlink family is removed while there are still open - * sockets. * @attrbuf: buffer to store parsed attributes * @family_list: family list * @mcgrps: multicast groups used by this family (private) @@ -61,8 +55,6 @@ struct genl_family { void (*post_doit)(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info); - int (*mcast_bind)(struct net *net, int group); - void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ const struct genl_ops * ops; /* private */ const struct genl_multicast_group *mcgrps; /* private */ diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 7ff588ca6817..28332bdac333 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -53,6 +53,7 @@ struct sock *__inet6_lookup_established(struct net *net, struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -60,6 +61,7 @@ struct sock *inet6_lookup_listener(struct net *net, static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, @@ -71,12 +73,12 @@ static inline struct sock *__inet6_lookup(struct net *net, if (sk) return sk; - return inet6_lookup_listener(net, hashinfo, saddr, sport, + return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif); } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, - struct sk_buff *skb, + struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif) @@ -86,16 +88,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; - return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, &ipv6_hdr(skb)->saddr, sport, &ipv6_hdr(skb)->daddr, ntohs(dport), iif); } struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); + +int inet6_hash(struct sock *sk); #endif /* IS_ENABLED(CONFIG_IPV6) */ #define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 72599bbc8255..a77a37c6349d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -319,5 +319,9 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); +/* update the fast reuse flag when adding a socket */ +void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + struct sock *sk); + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index dce2d586d9ce..245d999c0eac 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -3,6 +3,7 @@ #include <linux/ip.h> #include <linux/skbuff.h> +#include <linux/if_vlan.h> #include <net/inet_sock.h> #include <net/dsfield.h> diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index de2e3ade6102..50f635c2c536 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -207,12 +207,16 @@ void inet_hashinfo_init(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk); -void __inet_hash(struct sock *sk, struct sock *osk); -void inet_hash(struct sock *sk); +int __inet_hash(struct sock *sk, struct sock *osk, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)); +int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, @@ -220,10 +224,11 @@ struct sock *__inet_lookup_listener(struct net *net, static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __inet_lookup_listener(net, hashinfo, saddr, sport, + return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif); } @@ -299,6 +304,7 @@ static inline struct sock * static inline struct sock *__inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) @@ -307,12 +313,13 @@ static inline struct sock *__inet_lookup(struct net *net, struct sock *sk = __inet_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport, - daddr, hnum, dif); + return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr, + sport, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) @@ -320,7 +327,8 @@ static inline struct sock *inet_lookup(struct net *net, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, + dport, dif); local_bh_enable(); return sk; @@ -328,6 +336,7 @@ static inline struct sock *inet_lookup(struct net *net, static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, + int doff, const __be16 sport, const __be16 dport) { @@ -337,8 +346,8 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, if (sk) return sk; else - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - iph->saddr, sport, + return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb)); } diff --git a/include/net/ip.h b/include/net/ip.h index 61f2e268ec8a..9919332daf2c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -320,12 +320,18 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { struct net *net = dev_net(dst->dev); + unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); + /* 'forwarding = true' case should always honour route mtu */ + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } @@ -350,19 +356,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } @@ -599,4 +604,9 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif +static inline bool inetdev_valid_mtu(unsigned int mtu) +{ + return likely(mtu >= IPV4_MIN_MTU); +} + #endif /* _IP_H */ diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f9bdfb096579..a53853dbd226 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -103,6 +103,9 @@ void fib6_force_start_gc(struct net *net); struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast); +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags); + /* * support functions for ND * @@ -172,13 +175,14 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst, return rt->rt6i_flags & RTF_ANYCAST || (rt->rt6i_dst.plen != 128 && + !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)); } int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f6ff83b2ac87..b8dfab88c877 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -112,6 +112,7 @@ struct fib_info { unsigned char fib_scope; unsigned char fib_type; __be32 fib_prefsrc; + u32 fib_tb_id; u32 fib_priority; struct dst_metrics *fib_metrics; #define fib_mtu fib_metrics->metrics[RTAX_MTU-1] @@ -320,7 +321,7 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); -int fib_sync_down_addr(struct net *net, __be32 local); +int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 74bc08d82e14..798e5f95494b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -7,6 +7,8 @@ #include <linux/socket.h> #include <linux/types.h> #include <linux/u64_stats_sync.h> +#include <linux/bitops.h> + #include <net/dsfield.h> #include <net/gro_cells.h> #include <net/inet_ecn.h> @@ -48,6 +50,7 @@ struct ip_tunnel_key { __be16 tun_flags; u8 tos; /* TOS for IPv4, TC for IPv6 */ u8 ttl; /* TTL for IPv4, HL for IPv6 */ + __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; }; @@ -56,6 +59,11 @@ struct ip_tunnel_key { #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ +/* Maximum tunnel options length. */ +#define IP_TUNNEL_OPTS_MAX \ + GENMASK((FIELD_SIZEOF(struct ip_tunnel_info, \ + options_len) * BITS_PER_BYTE) - 1, 0) + struct ip_tunnel_info { struct ip_tunnel_key key; u8 options_len; @@ -177,7 +185,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op, static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, - u8 tos, u8 ttl, + u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, __be64 tun_id, __be16 tun_flags) { @@ -188,6 +196,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, 0, IP_TUNNEL_KEY_IPV4_PAD_LEN); key->tos = tos; key->ttl = ttl; + key->label = label; key->tun_flags = tun_flags; /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of @@ -365,6 +374,17 @@ static inline void ip_tunnel_unneed_metadata(void) { } +static inline void ip_tunnel_info_opts_get(void *to, + const struct ip_tunnel_info *info) +{ +} + +static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, + const void *from, int len) +{ + info->options_len = 0; +} + #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c07cf9596b6f..94880f07bc06 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -853,7 +853,7 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); -struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, +struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst); @@ -915,6 +915,8 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +int __ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, + int addr_len); int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); diff --git a/include/net/llc.h b/include/net/llc.h index 95e5ced4c133..18dfd3e49a69 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include <linux/if_ether.h> /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c1744c2f8aca..7740940b4d5c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -530,7 +530,7 @@ struct ieee80211_bss_conf { u8 sync_dtim_count; u32 basic_rates; struct ieee80211_rate *beacon_rate; - int mcast_rate[IEEE80211_NUM_BANDS]; + int mcast_rate[NUM_NL80211_BANDS]; u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; @@ -913,8 +913,8 @@ struct ieee80211_tx_info { * @common_ie_len: length of the common_ies */ struct ieee80211_scan_ies { - const u8 *ies[IEEE80211_NUM_BANDS]; - size_t len[IEEE80211_NUM_BANDS]; + const u8 *ies[NUM_NL80211_BANDS]; + size_t len[NUM_NL80211_BANDS]; const u8 *common_ies; size_t common_ie_len; }; @@ -1707,7 +1707,7 @@ struct ieee80211_sta_rates { * @txq: per-TID data TX queues (if driver uses the TXQ abstraction) */ struct ieee80211_sta { - u32 supp_rates[IEEE80211_NUM_BANDS]; + u32 supp_rates[NUM_NL80211_BANDS]; u8 addr[ETH_ALEN]; u16 aid; struct ieee80211_sta_ht_cap ht_cap; @@ -1926,6 +1926,11 @@ struct ieee80211_txq { * @IEEE80211_HW_BEACON_TX_STATUS: The device/driver provides TX status * for sent beacons. * + * @IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR: Hardware (or driver) requires that each + * station has a unique address, i.e. each station entry can be identified + * by just its MAC address; this prevents, for example, the same station + * from connecting to two virtual AP interfaces at the same time. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -1961,6 +1966,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TDLS_WIDER_BW, IEEE80211_HW_SUPPORTS_AMSDU_IN_AMPDU, IEEE80211_HW_BEACON_TX_STATUS, + IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -4318,7 +4324,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, */ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum ieee80211_band band, + enum nl80211_band band, size_t frame_len, struct ieee80211_rate *rate); @@ -5226,7 +5232,7 @@ struct rate_control_ops { }; static inline int rate_supported(struct ieee80211_sta *sta, - enum ieee80211_band band, + enum nl80211_band band, int index) { return (sta == NULL || sta->supp_rates[band] & BIT(index)); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 1c0d07376125..a68a460fa4f3 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -454,7 +454,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 636e9e11bd5f..a42d65ca0c27 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -98,7 +98,7 @@ struct nf_conn { possible_net_t ct_net; /* all members below initialized via memset */ - u8 __nfct_init_offset[0]; + struct { } __nfct_init_offset; /* If we were expected by an expectation, this will be it */ struct nf_conn *master; @@ -286,7 +286,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) struct kernel_param; -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b96df7499600..bee9031a4006 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -74,6 +74,8 @@ struct nft_regs { static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { + if (len % NFT_REG32_SIZE) + dst[len / NFT_REG32_SIZE] = 0; memcpy(dst, src, len); } diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index f2a9597ff53c..604190596cde 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,8 +25,6 @@ struct netprio_map { u32 priomap[]; }; -void sock_update_netprioidx(struct sock *sk); - static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -38,13 +36,25 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } + +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ + if (in_interrupt()) + return; + + sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); +} + #else /* !CONFIG_CGROUP_NET_PRIO */ + static inline u32 task_netprioidx(struct task_struct *p) { return 0; } -#define sock_update_netprioidx(sk) +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ +} #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 57ce24fb0047..53bd3c952ed4 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -42,6 +42,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ @@ -300,6 +301,7 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); +void nci_hci_deallocate(struct nci_dev *ndev); int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, diff --git a/include/net/nl802154.h b/include/net/nl802154.h index 32cb3e591e07..53f140fc1983 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include <linux/types.h> + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -143,10 +145,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 68e509750caa..039cc29cb4a8 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -51,7 +51,7 @@ void pn_sock_init(void); struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb); void phonet_get_local_port_range(int *min, int *max); -void pn_sock_hash(struct sock *sk); +int pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); diff --git a/include/net/ping.h b/include/net/ping.h index ac80cb45e630..5fd7cc244833 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -65,7 +65,7 @@ struct pingfakehdr { }; int ping_get_port(struct sock *sk, unsigned short ident); -void ping_hash(struct sock *sk); +int ping_hash(struct sock *sk); void ping_unhash(struct sock *sk); int ping_init_sock(struct sock *sk); diff --git a/include/net/raw.h b/include/net/raw.h index 6a40c6562dd2..3e789008394d 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -57,7 +57,7 @@ int raw_seq_open(struct inode *ino, struct file *file, #endif -void raw_hash_sk(struct sock *sk); +int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); struct raw_sock { diff --git a/include/net/red.h b/include/net/red.h index 3618cdfec884..117a3654d319 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -167,14 +167,24 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, + u8 Scell_log, u8 *stab) { - if (fls(qth_min) + Wlog > 32) + if (fls(qth_min) + Wlog >= 32) return false; - if (fls(qth_max) + Wlog > 32) + if (fls(qth_max) + Wlog >= 32) + return false; + if (Scell_log >= 32) return false; if (qth_max < qth_min) return false; + if (stab) { + int i; + + for (i = 0; i < RED_STAB_SIZE; i++) + if (stab[i] >= 32) + return false; + } return true; } diff --git a/include/net/route.h b/include/net/route.h index 11dfd0df0e67..0672dac4042c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -157,7 +157,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -210,6 +210,9 @@ unsigned int inet_addr_type_dev_table(struct net *net, void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); @@ -300,7 +303,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_rt_put(rt); flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -316,7 +319,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable flowi4_update_output(fl4, sk->sk_bound_dev_if, RT_CONN_FLAGS(sk), fl4->daddr, fl4->saddr); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 2f87c1ba13de..baa977247dc9 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -28,6 +28,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * * @list: Used internally * @kind: Identifier + * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters @@ -81,6 +82,7 @@ struct rtnl_link_ops { unsigned int (*get_num_tx_queues)(void); unsigned int (*get_num_rx_queues)(void); + bool netns_refund; int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_validate)(struct nlattr *tb[], diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ccd2a964dad7..2eee8ea05a7f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -674,9 +674,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -689,6 +691,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); @@ -794,6 +797,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -803,6 +807,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -825,6 +832,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 8c337cd0e1e4..15cfec311500 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -344,8 +344,7 @@ typedef enum { } sctp_scope_policy_t; /* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>, - * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24, - * 192.88.99.0/24. + * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 192.88.99.0/24. * Also, RFC 8.4, non-unicast addresses are not considered valid SCTP * addresses. */ @@ -353,15 +352,16 @@ typedef enum { ((htonl(INADDR_BROADCAST) == a) || \ ipv4_is_multicast(a) || \ ipv4_is_zeronet(a) || \ - ipv4_is_test_198(a) || \ ipv4_is_anycast_6to4(a)) /* Flags used for the bind address copy functions. */ -#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by +#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by local sock family */ -#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by +#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by + local sock family */ +#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by peer */ -#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by +#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by peer */ /* Reasons to retransmit. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index eea9bdeecba2..1d24da658f43 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -469,7 +469,7 @@ struct sctp_af { int saddr); void (*from_sk) (union sctp_addr *, struct sock *sk); - void (*from_addr_param) (union sctp_addr *, + bool (*from_addr_param) (union sctp_addr *, union sctp_addr_param *, __be16 port, int iif); int (*to_addr_param) (const union sctp_addr *, diff --git a/include/net/sock.h b/include/net/sock.h index 8eaba44fdc09..4c417486a1b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,7 @@ #include <linux/memcontrol.h> #include <linux/static_key.h> #include <linux/sched.h> +#include <linux/cgroup-defs.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> @@ -192,7 +193,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -287,7 +288,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -309,7 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -318,6 +318,7 @@ struct cg_proto; * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 + * @sk_reuseport_cb: reuseport group container */ struct sock { /* @@ -426,11 +427,11 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + spinlock_t sk_peer_lock; + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; long sk_sndtimeo; struct timer_list sk_timer; @@ -449,11 +450,8 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; kuid_t sk_uid; -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); @@ -463,6 +461,7 @@ struct sock { struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct rcu_head sk_rcu; + struct sock_reuseport __rcu *sk_reuseport_cb; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -700,18 +699,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -782,6 +781,8 @@ static inline int sk_memalloc_socks(void) { return static_key_false(&memalloc_socks); } + +void __receive_sock(struct file *file); #else static inline int sk_memalloc_socks(void) @@ -789,6 +790,8 @@ static inline int sk_memalloc_socks(void) return 0; } +static inline void __receive_sock(struct file *file) +{ } #endif static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) @@ -1016,7 +1019,7 @@ struct proto { void (*release_cb)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ - void (*hash)(struct sock *sk); + int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); @@ -1143,6 +1146,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk) sk_stream_memory_free(sk); } +static inline int sk_under_cgroup_hierarchy(struct sock *sk, + struct cgroup *ancestor) +{ +#ifdef CONFIG_SOCK_CGROUP_DATA + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), + ancestor); +#else + return -ENOTSUPP; +#endif +} static inline bool sk_has_memory_pressure(const struct sock *sk) { @@ -1208,11 +1221,13 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, unsigned long amt, int *parent_status) { - page_counter_charge(&prot->memory_allocated, amt); + struct page_counter *counter; + + if (page_counter_try_charge(&prot->memory_allocated, amt, &counter)) + return; - if (page_counter_read(&prot->memory_allocated) > - prot->memory_allocated.limit) - *parent_status = OVER_LIMIT; + page_counter_charge(&prot->memory_allocated, amt); + *parent_status = OVER_LIMIT; } static inline void memcg_memory_allocated_sub(struct cg_proto *prot, @@ -1286,7 +1301,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { struct proto *prot = sk->sk_prot; @@ -1333,10 +1348,10 @@ static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, /* With per-bucket locks this operation is not-atomic, so that * this version is not worse. */ -static inline void __sk_prot_rehash(struct sock *sk) +static inline int __sk_prot_rehash(struct sock *sk) { sk->sk_prot->unhash(sk); - sk->sk_prot->hash(sk); + return sk->sk_prot->hash(sk); } void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); @@ -1655,7 +1670,13 @@ static inline void sock_put(struct sock *sk) */ void sock_gen_put(struct sock *sk); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, + unsigned int trim_cap); +static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, + const int nested) +{ + return __sk_receive_skb(sk, skb, nested, 1); +} static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { @@ -1674,7 +1695,6 @@ static inline int sk_tx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk_tx_queue_clear(sk); sk->sk_socket = sock; } @@ -1727,7 +1747,8 @@ static inline u32 net_tx_rndhash(void) static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); } static inline void sk_rethink_txhash(struct sock *sk) @@ -1979,9 +2000,12 @@ static inline void sock_poll_wait(struct file *filp, static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } } diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h new file mode 100644 index 000000000000..aecd30308d50 --- /dev/null +++ b/include/net/sock_reuseport.h @@ -0,0 +1,28 @@ +#ifndef _SOCK_REUSEPORT_H +#define _SOCK_REUSEPORT_H + +#include <linux/filter.h> +#include <linux/skbuff.h> +#include <linux/types.h> +#include <net/sock.h> + +struct sock_reuseport { + struct rcu_head rcu; + + u16 max_socks; /* length of socks */ + u16 num_socks; /* elements in socks */ + struct bpf_prog __rcu *prog; /* optional BPF sock selector */ + struct sock *socks[0]; /* array of sock pointers */ +}; + +extern int reuseport_alloc(struct sock *sk); +extern int reuseport_add_sock(struct sock *sk, struct sock *sk2); +extern void reuseport_detach_sock(struct sock *sk); +extern struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len); +extern struct bpf_prog *reuseport_attach_prog(struct sock *sk, + struct bpf_prog *prog); + +#endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e06a10cf943e..87cad5d8ce75 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -52,7 +52,7 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; void tcp_time_wait(struct sock *sk, int state, int timeo); -#define MAX_TCP_HEADER (128 + MAX_HEADER) +#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 #define TCP_MIN_SND_MSS 48 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) @@ -518,19 +518,27 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); */ static inline void tcp_synq_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); unsigned long now = jiffies; - if (time_after(now, last_overflow + HZ)) - tcp_sk(sk)->rx_opt.ts_recent_stamp = now; + if (!time_between32(now, last_overflow, last_overflow + HZ)) + WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); - return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); + /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, + * then we're under synflood. However, we have to use + * 'last_overflow - HZ' as lower bound. That's because a concurrent + * tcp_synq_overflow() could update .ts_recent_stamp after we read + * jiffies but before we store .ts_recent_stamp into last_overflow, + * which could lead to rejecting a valid syncookie. + */ + return !time_between32(jiffies, last_overflow - HZ, + last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) diff --git a/include/net/udp.h b/include/net/udp.h index e57f50258cda..3f52b18ea12e 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -59,7 +59,7 @@ struct udp_skb_cb { * @lock: spinlock protecting changes to head/count */ struct udp_hslot { - struct hlist_nulls_head head; + struct hlist_head head; int count; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); @@ -177,9 +177,10 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) } /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ -static inline void udp_lib_hash(struct sock *sk) +static inline int udp_lib_hash(struct sock *sk) { BUG(); + return 0; } void udp_lib_unhash(struct sock *sk); @@ -191,7 +192,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock *, const struct sock *), + int (*)(const struct sock *, const struct sock *, bool), unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); @@ -259,7 +260,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, - struct udp_table *tbl); + struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -267,7 +268,8 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl); + int dif, struct udp_table *tbl, + struct sk_buff *skb); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index cb2f89f20f5c..c020d3389a21 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -88,8 +88,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be16 src_port, - __be16 dst_port, bool nocheck); + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck); #endif void udp_tunnel_sock_release(struct socket *sock); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c5c03fa47f8c..0f9cdfb77f91 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1554,8 +1554,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t); int xfrm6_transport_finish(struct sk_buff *skb, int async); +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); @@ -1731,21 +1733,17 @@ static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay static inline int xfrm_replay_clone(struct xfrm_state *x, struct xfrm_state *orig) { - x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn), + + x->replay_esn = kmemdup(orig->replay_esn, + xfrm_replay_state_esn_len(orig->replay_esn), GFP_KERNEL); if (!x->replay_esn) return -ENOMEM; - - x->replay_esn->bmp_len = orig->replay_esn->bmp_len; - x->replay_esn->replay_window = orig->replay_esn->replay_window; - - x->preplay_esn = kmemdup(x->replay_esn, - xfrm_replay_state_esn_len(x->replay_esn), + x->preplay_esn = kmemdup(orig->preplay_esn, + xfrm_replay_state_esn_len(orig->preplay_esn), GFP_KERNEL); - if (!x->preplay_esn) { - kfree(x->replay_esn); + if (!x->preplay_esn) return -ENOMEM; - } return 0; } @@ -1827,4 +1825,37 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x, return 0; } + +extern const int xfrm_msg_min[XFRM_NR_MSGTYPES]; +extern const struct nla_policy xfrma_policy[XFRMA_MAX+1]; + +struct xfrm_translator { + /* Allocate frag_list and put compat translation there */ + int (*alloc_compat)(struct sk_buff *skb, const struct nlmsghdr *src); + + /* Allocate nlmsg with 64-bit translaton of received 32-bit message */ + struct nlmsghdr *(*rcv_msg_compat)(const struct nlmsghdr *nlh, + int maxtype, const struct nla_policy *policy); + + /* Translate 32-bit user_policy from sockptr */ + int (*xlate_user_policy_sockptr)(u8 **pdata32, int optlen); + + struct module *owner; +}; + +#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT) +extern int xfrm_register_translator(struct xfrm_translator *xtr); +extern int xfrm_unregister_translator(struct xfrm_translator *xtr); +extern struct xfrm_translator *xfrm_get_translator(void); +extern void xfrm_put_translator(struct xfrm_translator *xtr); +#else +static inline struct xfrm_translator *xfrm_get_translator(void) +{ + return NULL; +} +static inline void xfrm_put_translator(struct xfrm_translator *xtr) +{ +} +#endif + #endif /* _NET_XFRM_H */ diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index d77416963f05..72f3b0d65435 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -200,11 +200,13 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { ip4 = in_dev_get(dev); - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) { + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, (struct in6_addr *)gid); + + if (ip4) in_dev_put(ip4); - } + dev_put(dev); } } diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index e59180264591..004bf0ca8884 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -256,7 +256,7 @@ int fcoe_ctlr_recv_flogi(struct fcoe_ctlr *, struct fc_lport *, struct fc_frame *); /* libfcoe funcs */ -u64 fcoe_wwn_from_mac(unsigned char mac[], unsigned int, unsigned int); +u64 fcoe_wwn_from_mac(unsigned char mac[MAX_ADDR_LEN], unsigned int, unsigned int); int fcoe_libfc_config(struct fc_lport *, struct fcoe_ctlr *, const struct libfc_function_template *, int init_fcp); u32 fcoe_fc_crc(struct fc_frame *fp); diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c7b1dc713cdd..9c7f4aad6db6 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -144,6 +144,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 11571b2a831e..92ba09200f89 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -24,6 +24,13 @@ scsi_command_size(const unsigned char *cmnd) scsi_varlen_cdb_length(cmnd) : COMMAND_SIZE(cmnd[0]); } +static inline unsigned char +scsi_command_control(const unsigned char *cmnd) +{ + return (cmnd[0] == VARIABLE_LENGTH_CMD) ? + cmnd[1] : cmnd[COMMAND_SIZE(cmnd[0]) - 1]; +} + /* Returns a human-readable name for the device */ extern const char *scsi_device_type(unsigned type); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 6183d20a01fb..e673c7c9c5fb 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -437,6 +437,8 @@ extern void iscsi_free_session(struct iscsi_cls_session *session); extern int iscsi_destroy_session(struct iscsi_cls_session *session); extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess, int dd_size, uint32_t cid); +extern void iscsi_put_conn(struct iscsi_cls_conn *conn); +extern void iscsi_get_conn(struct iscsi_cls_conn *conn); extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn); extern void iscsi_unblock_session(struct iscsi_cls_session *session); extern void iscsi_block_session(struct iscsi_cls_session *session); diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 5bb9261ec103..07555417fac5 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -71,6 +71,7 @@ struct snd_compr_runtime { * @direction: stream direction, playback/recording * @metadata_set: metadata set flag, true when set * @next_track: has userspace signal next track transition, true when set + * @partial_drain: undergoing partial_drain for stream, true when set * @private_data: pointer to DSP private data */ struct snd_compr_stream { @@ -81,6 +82,7 @@ struct snd_compr_stream { enum snd_compr_direction direction; bool metadata_set; bool next_track; + bool partial_drain; void *private_data; struct snd_soc_pcm_runtime *be; }; @@ -184,7 +186,13 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - stream->runtime->state = SNDRV_PCM_STATE_SETUP; + /* for partial_drain case we are back to running state on success */ + if (stream->partial_drain) { + stream->runtime->state = SNDRV_PCM_STATE_RUNNING; + stream->partial_drain = false; /* clear this flag as well */ + } else { + stream->runtime->state = SNDRV_PCM_STATE_SETUP; + } wake_up(&stream->runtime->sleep); } diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 2dd096eea935..3e0171ea47f3 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -93,9 +93,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 0eed9fd79ea5..7b08d0c5b662 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -181,6 +181,10 @@ enum tcm_sense_reason_table { TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = R(0x16), TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = R(0x17), TCM_COPY_TARGET_DEVICE_NOT_REACHABLE = R(0x18), + TCM_TOO_MANY_TARGET_DESCS = R(0x19), + TCM_UNSUPPORTED_TARGET_DESC_TYPE_CODE = R(0x1a), + TCM_TOO_MANY_SEGMENT_DESCS = R(0x1b), + TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE = R(0x1c), #undef R }; @@ -783,8 +787,9 @@ struct se_device { atomic_long_t read_bytes; atomic_long_t write_bytes; /* Active commands on this virtual SE device */ - atomic_t simple_cmds; - atomic_t dev_ordered_sync; + atomic_t non_ordered; + bool ordered_sync_in_progress; + atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; @@ -807,6 +812,7 @@ struct se_device { struct list_head dev_tmr_list; struct workqueue_struct *tmr_wq; struct work_struct qf_work_queue; + struct work_struct delayed_cmd_work; struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h new file mode 100644 index 000000000000..ab68640a18d0 --- /dev/null +++ b/include/trace/events/cgroup.h @@ -0,0 +1,163 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cgroup + +#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CGROUP_H + +#include <linux/cgroup.h> +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(cgroup_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root), + + TP_STRUCT__entry( + __field( int, root ) + __field( u16, ss_mask ) + __string( name, root->name ) + ), + + TP_fast_assign( + __entry->root = root->hierarchy_id; + __entry->ss_mask = root->subsys_mask; + __assign_str(name, root->name); + ), + + TP_printk("root=%d ss_mask=%#x name=%s", + __entry->root, __entry->ss_mask, __get_str(name)) +); + +DEFINE_EVENT(cgroup_root, cgroup_setup_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DEFINE_EVENT(cgroup_root, cgroup_destroy_root, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DEFINE_EVENT(cgroup_root, cgroup_remount, + + TP_PROTO(struct cgroup_root *root), + + TP_ARGS(root) +); + +DECLARE_EVENT_CLASS(cgroup, + + TP_PROTO(struct cgroup *cgrp), + + TP_ARGS(cgrp), + + TP_STRUCT__entry( + __field( int, root ) + __field( int, id ) + __field( int, level ) + __dynamic_array(char, path, + cgrp->kn ? cgroup_path(cgrp, NULL, 0) + 1 + : strlen("(null)")) + ), + + TP_fast_assign( + __entry->root = cgrp->root->hierarchy_id; + __entry->id = cgrp->id; + __entry->level = cgrp->level; + if (cgrp->kn) + cgroup_path(cgrp, __get_dynamic_array(path), + __get_dynamic_array_len(path)); + else + __assign_str(path, "(null)"); + ), + + TP_printk("root=%d id=%d level=%d path=%s", + __entry->root, __entry->id, __entry->level, __get_str(path)) +); + +DEFINE_EVENT(cgroup, cgroup_mkdir, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_rmdir, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_release, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DEFINE_EVENT(cgroup, cgroup_rename, + + TP_PROTO(struct cgroup *cgroup), + + TP_ARGS(cgroup) +); + +DECLARE_EVENT_CLASS(cgroup_migrate, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup), + + TP_STRUCT__entry( + __field( int, dst_root ) + __field( int, dst_id ) + __field( int, dst_level ) + __dynamic_array(char, dst_path, + dst_cgrp->kn ? cgroup_path(dst_cgrp, NULL, 0) + 1 + : strlen("(null)")) + __field( int, pid ) + __string( comm, task->comm ) + ), + + TP_fast_assign( + __entry->dst_root = dst_cgrp->root->hierarchy_id; + __entry->dst_id = dst_cgrp->id; + __entry->dst_level = dst_cgrp->level; + if (dst_cgrp->kn) + cgroup_path(dst_cgrp, __get_dynamic_array(dst_path), + __get_dynamic_array_len(dst_path)); + else + __assign_str(dst_path, "(null)"); + __entry->pid = task->pid; + __assign_str(comm, task->comm); + ), + + TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", + __entry->dst_root, __entry->dst_id, __entry->dst_level, + __get_str(dst_path), __entry->pid, __get_str(comm)) +); + +DEFINE_EVENT(cgroup_migrate, cgroup_attach_task, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup) +); + +DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks, + + TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup), + + TP_ARGS(dst_cgrp, task, threadgroup) +); + +#endif /* _TRACE_CGROUP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index c72f2dc01d0b..63a7680347cb 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -34,6 +34,83 @@ { F_WRLCK, "F_WRLCK" }, \ { F_UNLCK, "F_UNLCK" }) +TRACE_EVENT(locks_get_lock_context, + TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx), + + TP_ARGS(inode, type, ctx), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned char, type) + __field(struct file_lock_context *, ctx) + ), + + TP_fast_assign( + __entry->s_dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->type = type; + __entry->ctx = ctx; + ), + + TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, show_fl_type(__entry->type), __entry->ctx) +); + +DECLARE_EVENT_CLASS(filelock_lock, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + + TP_ARGS(inode, fl, ret), + + TP_STRUCT__entry( + __field(struct file_lock *, fl) + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(struct file_lock *, fl_next) + __field(fl_owner_t, fl_owner) + __field(unsigned int, fl_pid) + __field(unsigned int, fl_flags) + __field(unsigned char, fl_type) + __field(loff_t, fl_start) + __field(loff_t, fl_end) + __field(int, ret) + ), + + TP_fast_assign( + __entry->fl = fl ? fl : NULL; + __entry->s_dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->fl_next = fl ? fl->fl_next : NULL; + __entry->fl_owner = fl ? fl->fl_owner : NULL; + __entry->fl_pid = fl ? fl->fl_pid : 0; + __entry->fl_flags = fl ? fl->fl_flags : 0; + __entry->fl_type = fl ? fl->fl_type : 0; + __entry->fl_start = fl ? fl->fl_start : 0; + __entry->fl_end = fl ? fl->fl_end : 0; + __entry->ret = ret; + ), + + TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d", + __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, __entry->fl_next, __entry->fl_owner, + __entry->fl_pid, show_fl_flags(__entry->fl_flags), + show_fl_type(__entry->fl_type), + __entry->fl_start, __entry->fl_end, __entry->ret) +); + +DEFINE_EVENT(filelock_lock, posix_lock_inode, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + +DEFINE_EVENT(filelock_lock, fcntl_setlk, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + +DEFINE_EVENT(filelock_lock, locks_remove_posix, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); + DECLARE_EVENT_CLASS(filelock_lease, TP_PROTO(struct inode *inode, struct file_lock *fl), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8fd96aebfdee..f72832a749d6 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -1373,16 +1373,18 @@ TRACE_EVENT(core_ctl_set_busy, __field(u32, busy) __field(u32, old_is_busy) __field(u32, is_busy) + __field(bool, high_irqload) ), TP_fast_assign( __entry->cpu = cpu; __entry->busy = busy; __entry->old_is_busy = old_is_busy; __entry->is_busy = is_busy; + __entry->high_irqload = sched_cpu_high_irqload(cpu); ), - TP_printk("cpu=%u, busy=%u, old_is_busy=%u, new_is_busy=%u", + TP_printk("cpu=%u, busy=%u, old_is_busy=%u, new_is_busy=%u high_irqload=%d", __entry->cpu, __entry->busy, __entry->old_is_busy, - __entry->is_busy) + __entry->is_busy, __entry->high_irqload) ); TRACE_EVENT(core_ctl_set_boost, @@ -1647,9 +1649,9 @@ TRACE_EVENT(sched_boost_cpu, TRACE_EVENT(sched_tune_tasks_update, TP_PROTO(struct task_struct *tsk, int cpu, int tasks, int idx, - int boost, int max_boost), + int boost, int max_boost, u64 group_ts), - TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost), + TP_ARGS(tsk, cpu, tasks, idx, boost, max_boost, group_ts), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -1659,6 +1661,7 @@ TRACE_EVENT(sched_tune_tasks_update, __field( int, idx ) __field( int, boost ) __field( int, max_boost ) + __field( u64, group_ts ) ), TP_fast_assign( @@ -1669,13 +1672,15 @@ TRACE_EVENT(sched_tune_tasks_update, __entry->idx = idx; __entry->boost = boost; __entry->max_boost = max_boost; + __entry->group_ts = group_ts; ), TP_printk("pid=%d comm=%s " - "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d", + "cpu=%d tasks=%d idx=%d boost=%d max_boost=%d timeout=%llu", __entry->pid, __entry->comm, __entry->cpu, __entry->tasks, __entry->idx, - __entry->boost, __entry->max_boost) + __entry->boost, __entry->max_boost, + __entry->group_ts) ); /* @@ -1776,63 +1781,6 @@ TRACE_EVENT(sched_find_best_target, ); /* - * Tracepoint for accounting sched group energy - */ -TRACE_EVENT(sched_energy_diff, - - TP_PROTO(struct task_struct *tsk, int scpu, int dcpu, int udelta, - int nrgb, int nrga, int nrgd, int capb, int capa, int capd, - int nrgn, int nrgp), - - TP_ARGS(tsk, scpu, dcpu, udelta, - nrgb, nrga, nrgd, capb, capa, capd, - nrgn, nrgp), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, scpu ) - __field( int, dcpu ) - __field( int, udelta ) - __field( int, nrgb ) - __field( int, nrga ) - __field( int, nrgd ) - __field( int, capb ) - __field( int, capa ) - __field( int, capd ) - __field( int, nrgn ) - __field( int, nrgp ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->scpu = scpu; - __entry->dcpu = dcpu; - __entry->udelta = udelta; - __entry->nrgb = nrgb; - __entry->nrga = nrga; - __entry->nrgd = nrgd; - __entry->capb = capb; - __entry->capa = capa; - __entry->capd = capd; - __entry->nrgn = nrgn; - __entry->nrgp = nrgp; - ), - - TP_printk("pid=%d comm=%s " - "src_cpu=%d dst_cpu=%d usage_delta=%d " - "nrg_before=%d nrg_after=%d nrg_diff=%d " - "cap_before=%d cap_after=%d cap_delta=%d " - "nrg_delta=%d nrg_payoff=%d", - __entry->pid, __entry->comm, - __entry->scpu, __entry->dcpu, __entry->udelta, - __entry->nrgb, __entry->nrga, __entry->nrgd, - __entry->capb, __entry->capa, __entry->capd, - __entry->nrgn, __entry->nrgp) -); - -/* * Tracepoint for schedtune_tasks_update */ TRACE_EVENT(sched_tune_filter, @@ -1901,6 +1849,7 @@ TRACE_EVENT(walt_update_task_ravg, __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) __field( pid_t, cur_pid ) + __field(unsigned int, cur_freq ) __field( u64, wallclock ) __field( u64, mark_start ) __field( u64, delta_m ) @@ -1928,6 +1877,7 @@ TRACE_EVENT(walt_update_task_ravg, __entry->evt = evt; __entry->cpu = rq->cpu; __entry->cur_pid = rq->curr->pid; + __entry->cur_freq = rq->cur_freq; memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->mark_start = p->ravg.mark_start; @@ -1946,10 +1896,11 @@ TRACE_EVENT(walt_update_task_ravg, __entry->active_windows = p->ravg.active_windows; ), - TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" + TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" " cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u" , __entry->wallclock, __entry->win_start, __entry->delta, - __entry->evt, __entry->cpu, __entry->cur_pid, + __entry->evt, __entry->cpu, + __entry->cur_freq, __entry->cur_pid, __entry->pid, __entry->comm, __entry->mark_start, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime, diff --git a/include/trace/events/target.h b/include/trace/events/target.h index 50fea660c0f8..d543e8b87e50 100644 --- a/include/trace/events/target.h +++ b/include/trace/events/target.h @@ -139,6 +139,7 @@ TRACE_EVENT(target_sequencer_start, __field( unsigned int, opcode ) __field( unsigned int, data_length ) __field( unsigned int, task_attribute ) + __field( unsigned char, control ) __array( unsigned char, cdb, TCM_MAX_COMMAND_SIZE ) __string( initiator, cmd->se_sess->se_node_acl->initiatorname ) ), @@ -148,6 +149,7 @@ TRACE_EVENT(target_sequencer_start, __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; + __entry->control = scsi_command_control(cmd->t_task_cdb); memcpy(__entry->cdb, cmd->t_task_cdb, TCM_MAX_COMMAND_SIZE); __assign_str(initiator, cmd->se_sess->se_node_acl->initiatorname); ), @@ -157,9 +159,7 @@ TRACE_EVENT(target_sequencer_start, show_opcode_name(__entry->opcode), __entry->data_length, __print_hex(__entry->cdb, 16), show_task_attribute_name(__entry->task_attribute), - scsi_command_size(__entry->cdb) <= 16 ? - __entry->cdb[scsi_command_size(__entry->cdb) - 1] : - __entry->cdb[1] + __entry->control ) ); @@ -174,6 +174,7 @@ TRACE_EVENT(target_cmd_complete, __field( unsigned int, opcode ) __field( unsigned int, data_length ) __field( unsigned int, task_attribute ) + __field( unsigned char, control ) __field( unsigned char, scsi_status ) __field( unsigned char, sense_length ) __array( unsigned char, cdb, TCM_MAX_COMMAND_SIZE ) @@ -186,6 +187,7 @@ TRACE_EVENT(target_cmd_complete, __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; + __entry->control = scsi_command_control(cmd->t_task_cdb); __entry->scsi_status = cmd->scsi_status; __entry->sense_length = cmd->scsi_status == SAM_STAT_CHECK_CONDITION ? min(18, ((u8 *) cmd->sense_buffer)[SPC_ADD_SENSE_LEN_OFFSET] + 8) : 0; @@ -202,9 +204,7 @@ TRACE_EVENT(target_cmd_complete, show_opcode_name(__entry->opcode), __entry->data_length, __print_hex(__entry->cdb, 16), show_task_attribute_name(__entry->task_attribute), - scsi_command_size(__entry->cdb) <= 16 ? - __entry->cdb[scsi_command_size(__entry->cdb) - 1] : - __entry->cdb[1] + __entry->control ) ); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index fff846b512e6..6d9443d45fa2 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -65,8 +65,9 @@ TRACE_EVENT(writeback_dirty_page, ), TP_fast_assign( - strncpy(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -95,8 +96,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strncpy(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -142,10 +142,10 @@ static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) { struct cgroup *cgrp = wb->memcg_css->cgroup; - char *path; + int len; - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1); - WARN_ON_ONCE(path != buf); + len = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1); + WARN_ON_ONCE(len < 0); } static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) @@ -205,8 +205,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); @@ -249,8 +249,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -303,7 +302,7 @@ DECLARE_EVENT_CLASS(writeback_class, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: cgroup=%s", @@ -326,7 +325,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -351,7 +350,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -390,8 +389,9 @@ DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before, int moved), - TP_ARGS(wb, work, moved), + TP_ARGS(wb, work, dirtied_before, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) @@ -401,19 +401,17 @@ TRACE_EVENT(writeback_queue_io, __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) ), TP_fast_assign( - unsigned long *older_than_this = work->older_than_this; - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); - __entry->older = older_than_this ? *older_than_this : 0; - __entry->age = older_than_this ? - (jiffies - *older_than_this) * 1000 / HZ : -1; + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); + __entry->older = dirtied_before; + __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; __entry->reason = work->reason; __trace_wb_assign_cgroup(__get_str(cgroup), wb); ), TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s", __entry->name, - __entry->older, /* older_than_this in jiffies */ - __entry->age, /* older_than_this in relative milliseconds */ + __entry->older, /* dirtied_before in jiffies */ + __entry->age, /* dirtied_before in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), __get_str(cgroup) @@ -488,7 +486,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -553,7 +551,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -614,8 +612,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -688,8 +686,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ), TP_fast_assign( - strncpy(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + strscpy_pad(__entry->name, + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index d6be935caa50..ecd1a0f7bd3e 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -63,7 +63,11 @@ TRACE_EVENT(xen_mc_callback, TP_PROTO(xen_mc_callback_fn_t fn, void *data), TP_ARGS(fn, data), TP_STRUCT__entry( - __field(xen_mc_callback_fn_t, fn) + /* + * Use field_struct to avoid is_signed_type() + * comparison of a function pointer. + */ + __field_struct(xen_mc_callback_fn_t, fn) __field(void *, data) ), TP_fast_assign( diff --git a/include/trace/perf.h b/include/trace/perf.h index 26486fcd74ce..31b8d276a993 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -20,9 +20,6 @@ #undef __get_bitmask #define __get_bitmask(field) (char *)__get_dynamic_array(field) -#undef __perf_addr -#define __perf_addr(a) (__addr = (a)) - #undef __perf_count #define __perf_count(c) (__count = (c)) @@ -38,7 +35,7 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ struct pt_regs *__regs; \ - u64 __addr = 0, __count = 1; \ + u64 __count = 1; \ struct task_struct *__task = NULL; \ struct hlist_head *head; \ int __entry_size; \ @@ -48,16 +45,16 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (__builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - entry = perf_trace_buf_prepare(__entry_size, \ - event_call->event.type, &__regs, &rctx); \ + entry = perf_trace_buf_alloc(__entry_size, &__regs, &rctx); \ if (!entry) \ return; \ \ @@ -67,8 +64,9 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ - __count, __regs, head, __task); \ + perf_trace_run_bpf_submit(entry, __entry_size, rctx, \ + event_call, __count, __regs, \ + head, __task); \ } /* diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index af0cb7907922..f7d93efd5c8f 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -646,9 +646,6 @@ static inline notrace int trace_event_get_offsets_##call( \ #undef TP_fast_assign #define TP_fast_assign(args...) args -#undef __perf_addr -#define __perf_addr(a) (a) - #undef __perf_count #define __perf_count(c) (c) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 5c15c2a5c123..7f81582a329d 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -87,4 +87,9 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + +#define SO_COOKIE 57 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1324b0292ec2..a6b339e1d70e 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -715,9 +715,13 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd) __SYSCALL(__NR_membarrier, sys_membarrier) #define __NR_mlock2 284 __SYSCALL(__NR_mlock2, sys_mlock2) +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) #undef __NR_syscalls -#define __NR_syscalls 285 +#define __NR_syscalls 435 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9ea2d22fa2cb..4f415fcb2006 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ @@ -73,6 +78,8 @@ enum bpf_cmd { BPF_PROG_LOAD, BPF_OBJ_PIN, BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, }; enum bpf_map_type { @@ -81,6 +88,10 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, }; enum bpf_prog_type { @@ -89,8 +100,64 @@ enum bpf_prog_type { BPF_PROG_TYPE_KPROBE, BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, + BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, }; +enum bpf_attach_type { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + __MAX_BPF_ATTACH_TYPE +}; + +#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE + +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -98,12 +165,19 @@ enum bpf_prog_type { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +#define BPF_F_NO_PREALLOC (1U << 0) + +/* Flags for accessing BPF object */ +#define BPF_F_RDONLY (1U << 3) +#define BPF_F_WRONLY (1U << 4) + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ + __u32 map_flags; /* prealloc or not */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -130,6 +204,14 @@ union bpf_attr { struct { /* anonymous struct used by BPF_OBJ_* commands */ __aligned_u64 pathname; __u32 bpf_fd; + __u32 file_flags; + }; + + struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ + __u32 target_fd; /* container object to attach to */ + __u32 attach_bpf_fd; /* eBPF program to attach */ + __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); @@ -269,9 +351,254 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_perf_event_output, + BPF_FUNC_skb_load_bytes, + + /** + * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id + * @ctx: struct pt_regs* + * @map: pointer to stack_trace map + * @flags: bits 0-7 - numer of stack frames to skip + * bit 8 - collect user stack instead of kernel + * bit 9 - compare stacks by hash only + * bit 10 - if two different stacks hash into the same stackid + * discard old + * other bits - reserved + * Return: >= 0 stackid on success or negative error + */ + BPF_FUNC_get_stackid, + + /** + * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff + * @from: raw from buffer + * @from_size: length of from buffer + * @to: raw to buffer + * @to_size: length of to buffer + * @seed: optional seed + * Return: csum result + */ + BPF_FUNC_csum_diff, + + /** + * bpf_skb_[gs]et_tunnel_opt(skb, opt, size) + * retrieve or populate tunnel options metadata + * @skb: pointer to skb + * @opt: pointer to raw tunnel option data + * @size: size of @opt + * Return: 0 on success for set, option size for get + */ + BPF_FUNC_skb_get_tunnel_opt, + BPF_FUNC_skb_set_tunnel_opt, + + /** + * bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is + * v4 -> v6, v6 -> v4 transitions. The helper will also + * resize the skb. eBPF program is expected to fill the + * new headers via skb_store_bytes and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_proto, + + /** + * bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_type, + + /** + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_skb_under_cgroup, + + /** + * bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + */ + BPF_FUNC_get_hash_recalc, + + /** + * u64 bpf_get_current_task(void) + * Returns current task_struct + * Return: current + */ + BPF_FUNC_get_current_task, + + /** + * bpf_probe_write_user(void *dst, void *src, int len) + * safely attempt to write to a location + * @dst: destination address in userspace + * @src: source address on stack + * @len: number of bytes to copy + * Return: 0 on success or negative error + */ + BPF_FUNC_probe_write_user, + + /** + * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_current_task_under_cgroup, + + /** + * bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, + * to be used f.e. with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_tail, + + /** + * bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the + * skb is non-linear and not all of len are part of the + * linear section. Only needed for read/write with direct + * packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_pull_data, + + /** + * bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + */ + BPF_FUNC_csum_update, + + /** + * bpf_set_hash_invalid(skb) + * Invalidate current skb>hash. + * @skb: pointer to skb + */ + BPF_FUNC_set_hash_invalid, + + /** + * int bpf_get_numa_node_id() + * Return: Id of current NUMA node. + */ + BPF_FUNC_get_numa_node_id, + + /** + * int bpf_skb_change_head() + * Grows headroom of skb and adjusts MAC header offset accordingly. + * Will extends/reallocae as required automatically. + * May change skb data pointer and will thus invalidate any check + * performed for direct packet access. + * @skb: pointer to skb + * @len: length of header to be pushed in front + * @flags: Flags (unused for now) + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_head, + + /** + * int bpf_xdp_adjust_head(xdp_md, delta) + * Adjust the xdp_md.data by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data + * Return: 0 on success or negative on error + */ + BPF_FUNC_xdp_adjust_head, + + /** + * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) + * Copy a NUL terminated string from unsafe address. In case the string + * length is smaller than size, the target is not padded with further NUL + * bytes. In case the string length is larger than size, just count-1 + * bytes are copied and the last byte is set to NUL. + * @dst: destination address + * @size: maximum number of bytes to copy, including the trailing NUL + * @unsafe_ptr: unsafe address + * Return: + * > 0 length of the string including the trailing NUL on success + * < 0 error + */ + BPF_FUNC_probe_read_str, + + /** + * u64 bpf_bpf_get_socket_cookie(skb) + * Get the cookie for the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: 8 Bytes non-decreasing number on success or 0 if the socket + * field is missing inside sk_buff + */ + BPF_FUNC_get_socket_cookie, + + /** + * u32 bpf_get_socket_uid(skb) + * Get the owner uid of the socket stored inside sk_buff. + * @skb: pointer to skb + * Return: uid of the socket owner on success or 0 if the socket pointer + * inside sk_buff is NULL + */ + BPF_FUNC_get_socket_uid, + __BPF_FUNC_MAX_ID, }; +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) +#define BPF_F_INVALIDATE_HASH (1ULL << 1) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) +#define BPF_F_MARK_MANGLED_0 (1ULL << 5) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + +/* BPF_FUNC_skb_set_tunnel_key flags. */ +#define BPF_F_ZERO_CSUM_TX (1ULL << 1) +#define BPF_F_DONT_FRAGMENT (1ULL << 2) + +/* BPF_FUNC_get_stackid flags. */ +#define BPF_F_SKIP_FIELD_MASK 0xffULL +#define BPF_F_USER_STACK (1ULL << 8) +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define BPF_F_REUSE_STACKID (1ULL << 10) + +/* BPF_FUNC_perf_event_output flags and BPF_FUNC_perf_event_read. */ +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +/* BPF_FUNC_perf_event_output for sk_buff input context. */ +#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -291,11 +618,44 @@ struct __sk_buff { __u32 cb[5]; __u32 hash; __u32 tc_classid; + __u32 data; + __u32 data_end; }; struct bpf_tunnel_key { __u32 tunnel_id; - __u32 remote_ipv4; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; + __u16 tunnel_ext; + __u32 tunnel_label; +}; + +struct bpf_sock { + __u32 bound_dev_if; +}; + +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will result + * in packet drop. + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h new file mode 100644 index 000000000000..067427259820 --- /dev/null +++ b/include/uapi/linux/bpf_perf_event.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include <linux/types.h> +#include <linux/ptrace.h> + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h index c872bfd25e13..03c3e1869be7 100644 --- a/include/uapi/linux/const.h +++ b/include/uapi/linux/const.h @@ -24,4 +24,9 @@ #define _BITUL(x) (_AC(1,UL) << (x)) #define _BITULL(x) (_AC(1,ULL) << (x)) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) + +#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + #endif /* !(_LINUX_CONST_H) */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index c66b52303114..51caeb7797d0 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -93,6 +93,7 @@ struct inodes_stat_t { #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index 752f5dc040a5..0829d6d5e917 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -31,7 +31,7 @@ #include <linux/hdlc/ioctl.h> /* For glibc compatibility. An empty enum does not compile. */ -#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \ __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags @@ -99,7 +99,7 @@ enum net_device_flags { IFF_ECHO = 1<<18, /* volatile */ #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; -#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ /* for compatibility with glibc net/if.h */ #if __UAPI_DEF_IF_NET_DEVICE_FLAGS diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5ad57375a99f..7eb9178e3666 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -218,6 +218,7 @@ enum in6_addr_gen_mode { IN6_ADDR_GEN_MODE_EUI64, IN6_ADDR_GEN_MODE_NONE, IN6_ADDR_GEN_MODE_STABLE_PRIVACY, + IN6_ADDR_GEN_MODE_RANDOM, }; /* Bridge section */ diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h index 163e8adac2d6..de246e9f4974 100644 --- a/include/uapi/linux/if_pppol2tp.h +++ b/include/uapi/linux/if_pppol2tp.h @@ -17,6 +17,7 @@ #include <linux/types.h> +#include <linux/l2tp.h> /* Structure used to connect() the socket to a particular tunnel UDP * socket over IPv4. @@ -89,14 +90,12 @@ enum { PPPOL2TP_SO_REORDERTO = 5, }; -/* Debug message categories for the DEBUG socket option */ +/* Debug message categories for the DEBUG socket option (deprecated) */ enum { - PPPOL2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if - * compiled in) */ - PPPOL2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel - * interface */ - PPPOL2TP_MSG_SEQ = (1 << 2), /* sequence numbers */ - PPPOL2TP_MSG_DATA = (1 << 3), /* data packets */ + PPPOL2TP_MSG_DEBUG = L2TP_MSG_DEBUG, + PPPOL2TP_MSG_CONTROL = L2TP_MSG_CONTROL, + PPPOL2TP_MSG_SEQ = L2TP_MSG_SEQ, + PPPOL2TP_MSG_DATA = L2TP_MSG_DATA, }; diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 7a89b7b62ab8..f6a44010e84a 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -181,7 +181,6 @@ struct input_mask { #define INPUT_PROP_SEMI_MT 0x03 /* touch rectangle only */ #define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_POINTING_STICK 0x05 /* is a pointing stick */ -#define INPUT_PROP_NO_DUMMY_RELEASE 0x06 /* no dummy event */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 7b5e2aac86ac..739a4f3fbe94 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -165,6 +165,8 @@ enum IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, + IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, + IPV4_DEVCONF_DROP_GRATUITOUS_ARP, IPV4_DEVCONF_NF_IPV4_DEFRAG_SKIP, __IPV4_DEVCONF_MAX }; diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 347ef22a964e..c3a5c99f565b 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -9,9 +9,8 @@ #include <linux/types.h> #include <linux/socket.h> -#ifndef __KERNEL__ -#include <netinet/in.h> -#endif +#include <linux/in.h> +#include <linux/in6.h> #define IPPROTO_L2TP 115 @@ -31,7 +30,7 @@ struct sockaddr_l2tpip { __u32 l2tp_conn_id; /* Connection ID of tunnel */ /* Pad to size of `struct sockaddr'. */ - unsigned char __pad[sizeof(struct sockaddr) - + unsigned char __pad[__SOCK_SIZE__ - sizeof(__kernel_sa_family_t) - sizeof(__be16) - sizeof(struct in_addr) - sizeof(__u32)]; @@ -108,7 +107,7 @@ enum { L2TP_ATTR_VLAN_ID, /* u16 */ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */ - L2TP_ATTR_DEBUG, /* u32 */ + L2TP_ATTR_DEBUG, /* u32, enum l2tp_debug_flags */ L2TP_ATTR_RECV_SEQ, /* u8 */ L2TP_ATTR_SEND_SEQ, /* u8 */ L2TP_ATTR_LNS_MODE, /* u8 */ @@ -173,6 +172,21 @@ enum l2tp_seqmode { L2TP_SEQ_ALL = 2, }; +/** + * enum l2tp_debug_flags - debug message categories for L2TP tunnels/sessions + * + * @L2TP_MSG_DEBUG: verbose debug (if compiled in) + * @L2TP_MSG_CONTROL: userspace - kernel interface + * @L2TP_MSG_SEQ: sequence numbers + * @L2TP_MSG_DATA: data packets + */ +enum l2tp_debug_flags { + L2TP_MSG_DEBUG = (1 << 0), + L2TP_MSG_CONTROL = (1 << 1), + L2TP_MSG_SEQ = (1 << 2), + L2TP_MSG_DATA = (1 << 3), +}; + /* * NETLINK_GENERIC related info */ diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index 928f98997d8a..4acefd697677 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h @@ -20,7 +20,7 @@ #define _UAPI_LINUX_LIGHTNVM_H #ifdef __KERNEL__ -#include <linux/kernel.h> +#include <linux/const.h> #include <linux/ioctl.h> #else /* __KERNEL__ */ #include <stdio.h> diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h index 9097817b8ddc..ca87a3c37573 100644 --- a/include/uapi/linux/mmc/ioctl.h +++ b/include/uapi/linux/mmc/ioctl.h @@ -2,6 +2,7 @@ #define LINUX_MMC_IOCTL_H #include <linux/types.h> +#include <linux/major.h> struct mmc_ioc_cmd { /* Implies direction of data. true = write, false = read */ diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h index e956704f5fb1..95b8a9395ec1 100644 --- a/include/uapi/linux/msdos_fs.h +++ b/include/uapi/linux/msdos_fs.h @@ -9,7 +9,9 @@ * The MS-DOS filesystem constants/structures */ +#ifndef SECTOR_SIZE #define SECTOR_SIZE 512 /* sector size (bytes) */ +#endif #define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */ #define MSDOS_DPB (MSDOS_DPS) /* dir entries per block */ #define MSDOS_DPB_BITS 4 /* log2(MSDOS_DPB) */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d8c8a7c9d88a..b0a1c33d4a15 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -111,7 +111,7 @@ enum nf_tables_msg_types { * @NFTA_LIST_ELEM: list element (NLA_NESTED) */ enum nft_list_attributes { - NFTA_LIST_UNPEC, + NFTA_LIST_UNSPEC, NFTA_LIST_ELEM, __NFTA_LIST_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cthelper.h b/include/uapi/linux/netfilter/nfnetlink_cthelper.h index 33659f6fad3e..30557bade935 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cthelper.h +++ b/include/uapi/linux/netfilter/nfnetlink_cthelper.h @@ -4,7 +4,7 @@ #define NFCT_HELPER_STATUS_DISABLED 0 #define NFCT_HELPER_STATUS_ENABLED 1 -enum nfnl_acct_msg_types { +enum nfnl_cthelper_msg_types { NFNL_MSG_CTHELPER_NEW, NFNL_MSG_CTHELPER_GET, NFNL_MSG_CTHELPER_DEL, diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h index c36969b91533..8f40c2fe0ed4 100644 --- a/include/uapi/linux/netfilter/x_tables.h +++ b/include/uapi/linux/netfilter/x_tables.h @@ -1,6 +1,6 @@ #ifndef _UAPI_X_TABLES_H #define _UAPI_X_TABLES_H -#include <linux/kernel.h> +#include <linux/const.h> #include <linux/types.h> #define XT_FUNCTION_MAXNAMELEN 30 diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 1fad2c27ac32..da161b56c79e 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -2,9 +2,11 @@ #define _XT_BPF_H #include <linux/filter.h> +#include <linux/limits.h> #include <linux/types.h> #define XT_BPF_MAX_NUM_INSTR 64 +#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter)) struct bpf_prog; @@ -16,4 +18,24 @@ struct xt_bpf_info { struct bpf_prog *filter __attribute__((aligned(8))); }; +enum xt_bpf_modes { + XT_BPF_MODE_BYTECODE, + XT_BPF_MODE_FD_PINNED, + XT_BPF_MODE_FD_ELF, +}; +#define XT_BPF_MODE_PATH_PINNED XT_BPF_MODE_FD_PINNED + +struct xt_bpf_info_v1 { + __u16 mode; + __u16 bpf_program_num_elem; + __s32 fd; + union { + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + char path[XT_BPF_PATH_MAX]; + }; + + /* only used in the kernel */ + struct bpf_prog *filter __attribute__((aligned(8))); +}; + #endif /*_XT_BPF_H */ diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h index 29287be696a2..788b77c347a0 100644 --- a/include/uapi/linux/netfilter/xt_sctp.h +++ b/include/uapi/linux/netfilter/xt_sctp.h @@ -40,19 +40,19 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_SET(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] |= \ - 1 << (type % bytes(__u32)); \ + 1u << (type % bytes(__u32)); \ } while (0) #define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] &= \ - ~(1 << (type % bytes(__u32))); \ + ~(1u << (type % bytes(__u32))); \ } while (0) #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ ({ \ ((chunkmap)[type / bytes (__u32)] & \ - (1 << (type % bytes (__u32)))) ? 1: 0; \ + (1u << (type % bytes (__u32)))) ? 1: 0; \ }) #define SCTP_CHUNKMAP_RESET(chunkmap) \ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index a9d1772199bf..8edc617e566a 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -1,7 +1,7 @@ #ifndef _UAPI__LINUX_NETLINK_H #define _UAPI__LINUX_NETLINK_H -#include <linux/kernel.h> +#include <linux/const.h> #include <linux/socket.h> /* for __kernel_sa_family_t */ #include <linux/types.h> diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index 399f39ff8048..1b6d54a328ba 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -261,7 +261,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -269,14 +269,14 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5d25b25cf5ca..d71f4a5ddb0f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4061,11 +4061,15 @@ enum nl80211_txrate_gi { * @NL80211_BAND_2GHZ: 2.4 GHz ISM band * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) + * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace + * since newer kernel versions may support more bands */ enum nl80211_band { NL80211_BAND_2GHZ, NL80211_BAND_5GHZ, NL80211_BAND_60GHZ, + + NUM_NL80211_BANDS, }; /** diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index eb3c786afa70..04bd75b0f1f2 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -488,6 +488,12 @@ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 686da166aeec..a1cc04cb8d6e 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -270,6 +270,9 @@ enum perf_event_read_format { /* * Hardware event_id to monitor via a performance monitoring event: + * + * @sample_max_stack: Max number of frame pointers in a callchain, + * should be < /proc/sys/kernel/perf_event_max_stack */ struct perf_event_attr { @@ -380,7 +383,8 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; - __u32 __reserved_2; /* align to __u64 */ + __u16 sample_max_stack; + __u16 __reserved_2; /* align to __u64 */ }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/include/uapi/linux/qbg-profile.h b/include/uapi/linux/qbg-profile.h new file mode 100644 index 000000000000..a30f4dbba9c6 --- /dev/null +++ b/include/uapi/linux/qbg-profile.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __QBG_PROFILE_H__ +#define __QBG_PROFILE_H__ + +#define MAX_BP_LUT_ROWS 35 +#define MAX_BP_LUT_COLS 8 +#define MAX_PROFILE_NAME_LENGTH 256 + +enum profile_table_type { + CHARGE_TABLE = 0, + DISCHARGE_TABLE, +}; + +struct battery_data_table { + unsigned short int table[MAX_BP_LUT_ROWS][MAX_BP_LUT_COLS]; + int unit_conv_factor[MAX_BP_LUT_COLS]; + unsigned short int nrows; + unsigned short int ncols; +}; + +struct battery_config { + char bp_profile_name[MAX_PROFILE_NAME_LENGTH]; + int bp_batt_id; + int capacity; + int bp_checksum; + int soh_range_high; + int soh_range_low; + int normal_impedance; + int aged_impedance; + int normal_capacity; + int aged_capacity; + int recharge_soc_delta; + int recharge_vflt_delta; + int recharge_iterm; +}; + +struct battery_profile_table { + enum profile_table_type table_type; + int table_index; + struct battery_data_table *table; +}; + +/* IOCTLs to query battery profile data */ +/* Battery configuration */ +#define BPIOCXBP \ + _IOWR('B', 0x01, struct battery_config) +/* Battery profile table */ +#define BPIOCXBPTABLE \ + _IOWR('B', 0x02, struct battery_profile_table) + +#endif diff --git a/include/uapi/linux/qbg.h b/include/uapi/linux/qbg.h new file mode 100644 index 000000000000..d8e335be2f54 --- /dev/null +++ b/include/uapi/linux/qbg.h @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#ifndef __QBG_H__ +#define __QBG_H__ + +#define MAX_FIFO_COUNT 36 +#define QBG_MAX_STEP_CHG_ENTRIES 6 + +enum QBG_STATE { + QBG_LPM, + QBG_MPM, + QBG_HPM, + QBG_FAST_CHAR, + QBG_PON_OCV, + QBG_STATE_MAX, +}; + +enum QBG_SDAM_DATA_OFFSET { + QBG_ACC0_OFFSET = 0, + QBG_ACC1_OFFSET = 2, + QBG_ACC2_OFFSET = 4, + QBG_TBAT_OFFSET = 6, + QBG_IBAT_OFFSET = 8, + QBG_VREF_OFFSET = 10, + QBG_DATA_TAG_OFFSET = 12, + QBG_QG_STS_OFFSET, + QBG_STS1_OFFSET, + QBG_STS2_OFFSET, + QBG_STS3_OFFSET, + QBG_ONE_FIFO_LENGTH, +}; + +enum qbg { + QBG_PARAM_SOC, + QBG_PARAM_BATT_SOC, + QBG_PARAM_SYS_SOC, + QBG_PARAM_ESR, + QBG_PARAM_OCV_UV, + QBG_PARAM_MAX_LOAD_NOW, + QBG_PARAM_MAX_LOAD_AVG, + QBG_PARAM_HOLD_SOC_100PCT, + QBG_PARAM_CHARGE_CYCLE_COUNT, + QBG_PARAM_LEARNED_CAPACITY, + QBG_PARAM_TTF_100MS, + QBG_PARAM_TTE_100MS, + QBG_PARAM_SOH, + QBG_PARAM_TBAT, + QBG_PARAM_SYS_SOC_HOLD_100PCT, + QBG_PARAM_JEITA_COOL_THRESHOLD, + QBG_PARAM_TOTAL_IMPEDANCE, + QBG_PARAM_ESSENTIAL_PARAM_REVID, + QBG_PARAM_FIFO_TIMESTAMP, + QBG_PARAM_MAX, +}; + +struct qbg_essential_params { + short int msoc; + short int cutoff_soc; + short int full_soc; + short int x0; + short int x1; + short int x2; + short int soh_r; + short int soh_c; + short int theta0; + short int theta1; + short int theta2; + short int i1full; + short int i2full; + short int i1cutoff; + short int i2cutoff; + short int syssoc; + int discharge_cycle_count; + int charge_cycle_count; + unsigned int rtc_time; + short int batt_therm; + unsigned short int ocv; +} __attribute__ ((__packed__)); + +struct fifo_data { + unsigned short int v1; + unsigned short int v2; + unsigned short int i; + unsigned short int tbat; + unsigned short int ibat; + unsigned short int vref; + char data_tag; + char qg_sts; + char sts1; + char sts2; + char sts3; +} __attribute__ ((__packed__)); + +struct k_fifo_data { + unsigned int v1; + unsigned int v2; + unsigned int i; + unsigned int tbat; + unsigned int ibat; + unsigned int vref; + unsigned int data_tag; + unsigned int qg_sts; + unsigned int sts1; + unsigned int sts2; + unsigned int sts3; +} __attribute__ ((__packed__)); + +struct qbg_config { + unsigned int batt_id; + unsigned int pon_ocv; + unsigned int pon_ibat; + unsigned int pon_tbat; + unsigned int pon_soc; + unsigned int float_volt_uv; + unsigned int fastchg_curr_ma; + unsigned int vbat_cutoff_mv; + unsigned int ibat_cutoff_ma; + unsigned int vph_min_mv; + unsigned int iterm_ma; + unsigned int rconn_mohm; + unsigned long current_time; + unsigned int sdam_batt_id; + unsigned int essential_param_revid; + unsigned long sample_time_us[QBG_STATE_MAX]; +} __attribute__ ((__packed__)); + +struct qbg_param { + unsigned int data; + _Bool valid; +}; + +struct qbg_kernel_data { + unsigned int seq_no; + unsigned int fifo_time; + unsigned int fifo_count; + struct k_fifo_data fifo[MAX_FIFO_COUNT]; + struct qbg_param param[QBG_PARAM_MAX]; +} __attribute__ ((__packed__)); + +struct qbg_user_data { + struct qbg_param param[QBG_PARAM_MAX]; +} __attribute__ ((__packed__)); + +struct range_data { + int low_threshold; + int high_threshold; + unsigned int value; +} __attribute__ ((__packed__)); + +struct ranges { + struct range_data data[QBG_MAX_STEP_CHG_ENTRIES]; + unsigned char range_count; + _Bool valid; +} __attribute__((__packed__)); + +struct qbg_step_chg_jeita_params { + int jeita_full_fv_10nv; + int jeita_full_iterm_10na; + int jeita_warm_adc_value; + int jeita_cool_adc_value; + int battery_beta; + int battery_therm_kohm; + struct ranges step_fcc_cfg; + struct ranges jeita_fcc_cfg; + struct ranges jeita_fv_cfg; + unsigned char ttf_calc_mode; +} __attribute__ ((__packed__)); + +/* IOCTLs to read & write QBG config and essential params */ +#define QBGIOCXCFG _IOR('B', 0x01, struct qbg_config) +#define QBGIOCXEPR _IOR('B', 0x02, struct qbg_essential_params) +#define QBGIOCXEPW _IOWR('B', 0x03, struct qbg_essential_params) +#define QBGIOCXSTEPCHGCFG \ + _IOWR('B', 0x04, struct qbg_step_chg_jeita_params) + +#endif diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index cc89ddefa926..ed6e31d9c195 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -9,6 +9,7 @@ #define CLONE_FS 0x00000200 /* set if fs info shared between processes */ #define CLONE_FILES 0x00000400 /* set if open files shared between processes */ #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ @@ -21,8 +22,7 @@ #define CLONE_DETACHED 0x00400000 /* Unused, ignored */ #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ -/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state) - and is now available for re-use. */ +#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ #define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ #define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 1e5ac4e776da..5bcc637cee46 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -61,6 +61,7 @@ * ST16C654: 8 16 56 60 8 16 32 56 PORT_16654 * TI16C750: 1 16 32 56 xx xx xx xx PORT_16750 * TI16C752: 8 16 56 60 8 16 32 56 + * OX16C950: 16 32 112 120 16 32 64 112 PORT_16C950 * Tegra: 1 4 8 14 16 8 4 1 PORT_TEGRA */ #define UART_FCR_R_TRIG_00 0x00 diff --git a/include/uapi/linux/slatecom_interface.h b/include/uapi/linux/slatecom_interface.h new file mode 100644 index 000000000000..1caed8c42bf6 --- /dev/null +++ b/include/uapi/linux/slatecom_interface.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (c) 2017-2021, The Linux Foundation. All rights reserved. + */ +#ifndef LINUX_SLATECOM_INTERFACE_H +#define LINUX_SLATECOM_INTERFACE_H + +#include <linux/types.h> + +#define SLATECOM_REG_READ 0 +#define SLATECOM_AHB_READ 1 +#define SLATECOM_AHB_WRITE 2 +#define SLATECOM_SET_SPI_FREE 3 +#define SLATECOM_SET_SPI_BUSY 4 +#define SLATECOM_REG_WRITE 5 +#define SLATECOM_SOFT_RESET 6 +#define SLATECOM_MODEM_DOWN2_SLATE 7 +#define SLATECOM_TWM_EXIT 8 +#define SLATECOM_SLATE_APP_RUNNING 9 +#define SLATECOM_ADSP_DOWN2_SLATE 10 +#define SLATECOM_SLATE_WEAR_LOAD 11 +#define SLATECOM_SLATE_WEAR_UNLOAD 12 +#define EXCHANGE_CODE 'V' + +struct slate_ui_data { + __u64 __user write; + __u64 __user result; + __u32 slate_address; + __u32 cmd; + __u32 num_of_words; + __u8 __user *buffer; +}; + +enum slate_event_type { + SLATE_BEFORE_POWER_DOWN = 1, + SLATE_AFTER_POWER_DOWN, + SLATE_BEFORE_POWER_UP, + SLATE_AFTER_POWER_UP, + MODEM_BEFORE_POWER_DOWN, + MODEM_AFTER_POWER_UP, + ADSP_BEFORE_POWER_DOWN, + ADSP_AFTER_POWER_UP, + TWM_SLATE_AFTER_POWER_UP, + SLATE_DSP_ERROR, + SLATE_DSP_READY, + SLATE_BT_ERROR, + SLATE_BT_READY, +}; + +#define SLATE_AFTER_POWER_UP SLATE_AFTER_POWER_UP + +#define REG_READ \ + _IOWR(EXCHANGE_CODE, SLATECOM_REG_READ, \ + struct slate_ui_data) +#define AHB_READ \ + _IOWR(EXCHANGE_CODE, SLATECOM_AHB_READ, \ + struct slate_ui_data) +#define AHB_WRITE \ + _IOW(EXCHANGE_CODE, SLATECOM_AHB_WRITE, \ + struct slate_ui_data) +#define SET_SPI_FREE \ + _IOR(EXCHANGE_CODE, SLATECOM_SET_SPI_FREE, \ + struct slate_ui_data) +#define SET_SPI_BUSY \ + _IOR(EXCHANGE_CODE, SLATECOM_SET_SPI_BUSY, \ + struct slate_ui_data) +#define REG_WRITE \ + _IOWR(EXCHANGE_CODE, SLATECOM_REG_WRITE, \ + struct slate_ui_data) +#define SLATE_SOFT_RESET \ + _IOWR(EXCHANGE_CODE, SLATECOM_SOFT_RESET, \ + struct slate_ui_data) +#define SLATE_TWM_EXIT \ + _IOWR(EXCHANGE_CODE, SLATECOM_TWM_EXIT, \ + struct slate_ui_data) +#define SLATE_APP_RUNNING \ + _IOWR(EXCHANGE_CODE, SLATECOM_SLATE_APP_RUNNING, \ + struct slate_ui_data) +#define SLATE_MODEM_DOWN2_SLATE_DONE \ + _IOWR(EXCHANGE_CODE, SLATECOM_MODEM_DOWN2_SLATE, \ + struct slate_ui_data) +#define SLATE_WEAR_LOAD \ + _IOWR(EXCHANGE_CODE, SLATECOM_SLATE_WEAR_LOAD, \ + struct slate_ui_data) +#define SLATE_WEAR_UNLOAD \ + _IOWR(EXCHANGE_CODE, SLATECOM_SLATE_WEAR_UNLOAD, \ + struct slate_ui_data) +#define SLATE_ADSP_DOWN2_SLATE_DONE \ + _IOWR(EXCHANGE_CODE, SLATECOM_ADSP_DOWN2_SLATE, \ + struct slate_ui_data) + +#endif /* LINUX_SLATECOM_INTERFACE_H */ + diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 47e0de1df362..a1ae3778570f 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -22,7 +22,7 @@ #ifndef _UAPI_LINUX_SYSCTL_H #define _UAPI_LINUX_SYSCTL_H -#include <linux/kernel.h> +#include <linux/const.h> #include <linux/types.h> #include <linux/compiler.h> diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 7778723e4405..a055762bcea2 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -333,6 +333,9 @@ struct usb_config_descriptor { /*-------------------------------------------------------------------------*/ +/* USB String descriptors can contain at most 126 characters. */ +#define USB_MAX_STRING_LEN 126 + /* USB_DT_STRING: String descriptor */ struct usb_string_descriptor { __u8 bLength; diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h new file mode 100644 index 000000000000..ae88be14c947 --- /dev/null +++ b/include/uapi/linux/wireguard.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, + * these two functions actually accept a slightly different set of inputs and + * outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the + * following tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the + * following tree of nested items, containing one but not both of + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at + * all by most users of this API, as the + * most recent protocol will be used when + * this is unset. Otherwise, must be set + * to 1. + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, several + * messages should be sent one after another, with each successive one + * filling in information not contained in the prior. Note that if + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably + * should not be specified in fragments that come after, so that the list + * of peers is only cleared the first time but appended after. Likewise for + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message + * of a peer, it likely should not be specified in subsequent fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#ifndef _WG_UAPI_WIREGUARD_H +#define _WG_UAPI_WIREGUARD_H + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) + +#endif /* _WG_UAPI_WIREGUARD_H */ diff --git a/include/xen/events.h b/include/xen/events.h index 88da2abaf535..ad0c61cf399b 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -12,11 +12,16 @@ unsigned xen_evtchn_nr_channels(void); -int bind_evtchn_to_irq(unsigned int evtchn); -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, @@ -29,13 +34,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, const char *devname, void *dev_id); int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port); + evtchn_port_t remote_port); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); /* * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -44,6 +57,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); +/* + * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi + * functions above. + */ +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags); +/* Signal an event was spurious, i.e. there was no action resulting from it. */ +#define XEN_EOI_FLAG_SPURIOUS 0x00000001 + #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 34b1379f9777..f9d8aac170fb 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -157,6 +157,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, map->flags = flags; map->ref = ref; map->dom = domid; + map->status = 1; /* arbitrary positive value */ } static inline void diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 7dc685b4057d..4076d1c407d8 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -24,82 +24,79 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __CONST_RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) - +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; - +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t __pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ /* * Macros for manipulating rings. * @@ -116,105 +113,99 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->req_prod_pvt = (_i); \ + (_r)->rsp_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ -} while (0) +#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) -/* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->rsp_prod_pvt = (_i); \ + (_r)->req_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ -} while (0) +#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + /* - * Get a local copy of a request. + * Get a local copy of a request/response. * - * Use this in preference to RING_GET_REQUEST() so all processing is + * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is * done on a local copy that cannot be modified by the other end. * * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this - * to be ineffective where _req is a struct which consists of only bitfields. + * to be ineffective where dest is a struct which consists of only bitfields. */ -#define RING_COPY_REQUEST(_r, _idx, _req) do { \ - /* Use volatile to force the copy into _req. */ \ - *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +#define RING_COPY_(type, r, idx, dest) do { \ + /* Use volatile to force the copy into dest. */ \ + *(dest) = *(volatile typeof(dest))RING_GET_##type(r, idx); \ } while (0) -#define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) +#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) +#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) /* Ill-behaved frontend determination: Can there be this many requests? */ -#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) +/* Ill-behaved backend determination: Can there be this many responses? */ +#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -247,40 +238,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) #endif /* __XEN_PUBLIC_IO_RING_H__ */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 32b944b7cebd..ed9e7e3307b7 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -58,6 +58,15 @@ struct xenbus_watch /* Path being watched. */ const char *node; + unsigned int nr_pending; + + /* + * Called just before enqueing new event while a spinlock is held. + * The event will be discarded if this callback returns false. + */ + bool (*will_handle)(struct xenbus_watch *, + const char **vec, unsigned int len); + /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); @@ -194,10 +203,14 @@ void xenbus_suspend_cancel(void); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char **, unsigned int), void (*callback)(struct xenbus_watch *, const char **, unsigned int)); -__printf(4, 5) +__printf(5, 6) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char **, unsigned int), void (*callback)(struct xenbus_watch *, const char **, unsigned int), const char *pathfmt, ...); |