summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavide Garberi <dade.garberi@gmail.com>2023-05-28 15:57:41 +0200
committerDavide Garberi <dade.garberi@gmail.com>2023-08-03 02:55:58 +0200
commitce2e1dc133974cf19668249e2f527bb98ddf449f (patch)
tree7c462c49b405757413d42411042274e035debab4
parent7ef1b1f38a94bf54b215c4c56ea8d62ec607f73b (diff)
parent1a4b80f8f2017576f530658f48659d9a222f4648 (diff)
Merge lineage-20 of git@github.com:LineageOS/android_kernel_qcom_msm8998.git into lineage-20
1a4b80f8f201 ANDROID: arch:arm64: Increase kernel command line size 7c253f7aa663 of: reserved_mem: increase max number reserved regions df4dbf557503 msm: camera: Fix indentations 2fc4a156d15d msm: camera: Fix code flow when populating CAM_V_CUSTOM1 687bcb61f125 ALSA: control: use counting semaphore as write lock for ELEM_WRITE operation 75cf9e8c1b1c ALSA: control: Fix memory corruption risk in snd_ctl_elem_read 76cf3b5e53df ALSA: control: code refactoring for ELEM_READ/ELEM_WRITE operations e9af212f9685 ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF 95fc4fff573f msm: kgsl: Make sure that pool pages don't have any extra references 59ceabe0d242 msm: kgsl: Use dma_buf_get() to get dma_buf structure d1f19956d6b9 ANDROID: usb: f_accessory: Check buffer size when initialised via composite 2d3ce4f7a366 kbuild: handle libs-y archives separately from built-in.o archives 65dc3fbd1593 kbuild: thin archives use P option to ar 362c7b73bac8 kbuild: thin archives for multi-y targets 43076241b514 kbuild: thin archives final link close --whole-archives option aa04fc78256d kbuild: minor improvement for thin archives build f5896747cda6 Merge tag 'LA.UM.7.2.c25-07700-sdm660.0' of https://git.codelinaro.org/clo/la/platform/vendor/qcom-opensource/wlan/qcacld-3.0 into android13-4.4-msm8998 321ac077ee7e qcacld-3.0: Fix out-of-bounds in tx_stats 42be8e4cbf13 BACKPORT: usb: gadget: rndis: prevent integer overflow in rndis_set_response() b490a85b5945 FROMGIT: arm64: fix oops in concurrently setting insn_emulation sysctls 7ed7084b34a9 FROMLIST: binder: fix UAF of ref->proc caused by race condition e31f087fb864 ANDROID: selinux: modify RTM_GETNEIGH{TBL} 80675d431434 UPSTREAM: usb: gadget: clear related members when goto fail fb6adfb00108 UPSTREAM: usb: gadget: don't release an existing dev->buf e4a8dd12424e UPSTREAM: USB: gadget: validate interface OS descriptor requests 8f0a947317e0 UPSTREAM: usb: gadget: rndis: check size of RNDIS_MSG_SET command 1541758765ff ion: Do not 'put' ION handle until after its final use 03b4b3cd8d30 Merge tag 'LA.UM.7.2.c25-07000-sdm660.0' of https://git.codelinaro.org/clo/la/platform/vendor/qcom-opensource/wlan/qcacld-3.0 into android13-4.4-msm8998 7dbda95466d5 Merge tag 'LA.UM.8.4.c25-06600-8x98.0' of https://git.codelinaro.org/clo/la/kernel/msm-4.4 into android13-4.4-msm8998 369119e5df4e cert host tools: Stop complaining about deprecated OpenSSL functions f8e30a0f9a17 fixup! BACKPORT: treewide: Fix function prototypes for module_param_call() 4fa5045f3dc9 arm64/efi: Mark __efistub_stext_offset as an absolute symbol explicitly bcd9668da77f arm64: kernel: do not need to reset UAO on exception entry c4ddd677f7e3 Kbuild: do not emit debug info for assembly with LLVM_IAS=1 1b880b6e19f8 qcacld-3.0: Add time slice duty cycle in wifi_interface_info fd24be2b22a1 qcacmn: Add time slice duty cycle attribute into QCA vendor command d719c1c825f8 qcacld-3.0: Use field-by-field assignment for FW stats fb5eb3bda2d9 ext4: enable quota enforcement based on mount options cd40d7f301de ext4: adds project ID support 360e2f3d18b8 ext4: add project quota support c31ac2be1594 drivers: qcacld-3.0: Remove in_compat_syscall() redefinition 6735c13a269d arm64: link with -z norelro regardless of CONFIG_RELOCATABLE 99962aab3433 arm64: relocatable: fix inconsistencies in linker script and options 24bd8cc5e6bb arm64: prevent regressions in compressed kernel image size when upgrading to binutils 2.27 93bb4c2392a2 arm64: kernel: force ET_DYN ELF type for CONFIG_RELOCATABLE=y a54bbb725ccb arm64: build with baremetal linker target instead of Linux when available c5805c604a9b arm64: add endianness option to LDFLAGS instead of LD ab6052788f60 arm64: Set UTS_MACHINE in the Makefile c3330429b2c6 kbuild: clear LDFLAGS in the top Makefile f33c1532bd61 kbuild: use HOSTLDFLAGS for single .c executables 38b7db363a96 BACKPORT: arm64: Change .weak to SYM_FUNC_START_WEAK_PI for arch/arm64/lib/mem*.S 716cb63e81d9 BACKPORT: crypto: arm64/aes-ce-cipher - move assembler code to .S file 7dfbaee16432 BACKPORT: arm64: Remove reference to asm/opcodes.h 531ee8624d17 BACKPORT: arm64: kprobe: protect/rename few definitions to be reused by uprobe 08d83c997b0c BACKPORT: arm64: Delete the space separator in __emit_inst e3951152dc2d BACKPORT: arm64: Get rid of asm/opcodes.h 255820c0f301 BACKPORT: arm64: Fix minor issues with the dcache_by_line_op macro 21bb344a664b BACKPORT: crypto: arm64/aes-modes - get rid of literal load of addend vector 26d5a53c6e0d BACKPORT: arm64: vdso: remove commas between macro name and arguments 78bff1f77c9d BACKPORT: kbuild: support LLVM=1 to switch the default tools to Clang/LLVM 6634f9f63efe BACKPORT: kbuild: replace AS=clang with LLVM_IAS=1 b891e8fdc466 BACKPORT: Documentation/llvm: fix the name of llvm-size 75d6fa8368a8 BACKPORT: Documentation/llvm: add documentation on building w/ Clang/LLVM 95b0a5e52f2a BACKPORT: ANDROID: ftrace: fix function type mismatches 7da9c2138ec8 BACKPORT: ANDROID: fs: logfs: fix filler function type d6d5a4b28ad0 BACKPORT: ANDROID: fs: gfs2: fix filler function type 9b194a470db5 BACKPORT: ANDROID: fs: exofs: fix filler function type 7a45ac4bfb49 BACKPORT: ANDROID: fs: afs: fix filler function type 4099e1b281e5 BACKPORT: drivers/perf: arm_pmu: fix function type mismatch af7b738882f7 BACKPORT: dummycon: fix function types 1b0b55a36dbe BACKPORT: fs: nfs: fix filler function type a58a0e30e20a BACKPORT: mm: fix filler function type mismatch 829e9226a8c0 BACKPORT: mm: fix drain_local_pages function type 865ef61b4da8 BACKPORT: vfs: pass type instead of fn to do_{loop,iter}_readv_writev() 08d2f8e7ba8e BACKPORT: module: Do not paper over type mismatches in module_param_call() ea467f6c33e4 BACKPORT: treewide: Fix function prototypes for module_param_call() d131459e6b8b BACKPORT: module: Prepare to convert all module_param_call() prototypes 6f52abadf006 BACKPORT: kbuild: fix --gc-sections bf7540ffce44 BACKPORT: kbuild: record needed exported symbols for modules c49d2545e437 BACKPORT: kbuild: Allow to specify composite modules with modname-m 427d0fc67dc1 BACKPORT: kbuild: add arch specific post-link Makefile 69f8a31838a3 BACKPORT: arm64: add a workaround for GNU gold with ARM64_MODULE_PLTS ba3368756abf BACKPORT: arm64: explicitly pass --no-fix-cortex-a53-843419 to GNU gold 6dacd7e737fb BACKPORT: arm64: errata: Pass --fix-cortex-a53-843419 to ld if workaround enabled d2787c21f2b5 BACKPORT: kbuild: add __ld-ifversion and linker-specific macros 2d471de60bb4 BACKPORT: kbuild: add ld-name macro 06280a90d845 BACKPORT: arm64: keep .altinstructions and .altinstr_replacement eb0ad3ae07f9 BACKPORT: kbuild: add __cc-ifversion and compiler-specific variants 3d01e1eba86b BACKPORT: FROMLIST: kbuild: add clang-version.sh 18dd378ab563 BACKPORT: FROMLIST: kbuild: fix LD_DEAD_CODE_DATA_ELIMINATION aabbc122b1de BACKPORT: kbuild: thin archives make default for all archs 756d47e345fc BACKPORT: kbuild: allow archs to select link dead code/data elimination 723ab99e48a7 BACKPORT: kbuild: allow architectures to use thin archives instead of ld -r 0b77ec583772 drivers/usb/serial/console.c: remove superfluous serial->port condition 6488cb478f04 drivers/firmware/efi/libstub.c: prevent a relocation dba4259216a0 UPSTREAM: pidfd: fix a poll race when setting exit_state baab6e33b07b BACKPORT: arch: wire-up pidfd_open() 5d2e9e4f8630 BACKPORT: pid: add pidfd_open() f8396a127daf UPSTREAM: pidfd: add polling support f4c358582254 UPSTREAM: signal: improve comments 5500316dc8d8 UPSTREAM: fork: do not release lock that wasn't taken fc7d707593e3 BACKPORT: signal: support CLONE_PIDFD with pidfd_send_signal f044fa00d72a BACKPORT: clone: add CLONE_PIDFD f20fc1c548f2 UPSTREAM: Make anon_inodes unconditional de80525cd462 UPSTREAM: signal: use fdget() since we don't allow O_PATH 229e1bdd624e UPSTREAM: signal: don't silently convert SI_USER signals to non-current pidfd ada02e996b52 BACKPORT: signal: add pidfd_send_signal() syscall 828857678c5c compat: add in_compat_syscall to ask whether we're in a compat syscall e7aede4896c0 bpf: Add new cgroup attach type to enable sock modifications 9ed75228b09c ebpf: allow bpf_get_current_uid_gid_proto also for networking c5aa3963b4ae bpf: fix overflow in prog accounting c46a001439fc bpf: Make sure mac_header was set before using it 8aed99185615 bpf: Enlarge offset check value to INT_MAX in bpf_skb_{load,store}_bytes b0a638335ba6 bpf: avoid false sharing of map refcount with max_entries 1f21605e373c net: remove hlist_nulls_add_tail_rcu() 9ce369b09dbb udp: get rid of SLAB_DESTROY_BY_RCU allocations 070f539fb5d7 udp: no longer use SLAB_DESTROY_BY_RCU a32d2ea857c5 inet: refactor inet[6]_lookup functions to take skb fcf3e7bc7203 soreuseport: fix initialization race df03c8cf024a soreuseport: Fix TCP listener hash collision bd8b9f50c9d3 inet: Fix missing return value in inet6_hash bae331196dd0 soreuseport: fast reuseport TCP socket selection 4ada2ed73da0 inet: create IPv6-equivalent inet_hash function 73f609838475 sock: struct proto hash function may error e3b32750621b cgroup: Fix sock_cgroup_data on big-endian. 69dabcedd4b9 selinux: always allow mounting submounts 17d6ddebcc49 userns: Don't fail follow_automount based on s_user_ns cbd08255e6f8 fs: Better permission checking for submounts 3a9ace719251 mnt: Move the FS_USERNS_MOUNT check into sget_userns af53549b43c5 locks: sprinkle some tracepoints around the file locking code 07dbbc84aa34 locks: rename __posix_lock_file to posix_lock_inode 400cbe93d180 autofs: Fix automounts by using current_real_cred()->uid 7903280ee07a fs: Call d_automount with the filesystems creds b87fb50ff1cd UPSTREAM: kernfs: Check KERNFS_HAS_RELEASE before calling kernfs_release_file() c9c596de3e52 UPSTREAM: kernfs: fix locking around kernfs_ops->release() callback 2172eaf5a901 UPSTREAM: cgroup, bpf: remove unnecessary #include dc81f3963dde kernfs: kernfs_sop_show_path: don't return 0 after seq_dentry call ce9a52e20897 cgroup: Make rebind_subsystems() disable v2 controllers all at once ce5e3aa14c39 cgroup: fix sock_cgroup_data initialization on earlier compilers 94a70ef24da9 samples/bpf: fix bpf_perf_event_output prototype c1920272278e net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list d7707635776b sk_buff: allow segmenting based on frag sizes 924bbacea75e ip_tunnel, bpf: ip_tunnel_info_opts_{get, set} depends on CONFIG_INET 0e9008d618f4 bpf: udp: ipv6: Avoid running reuseport's bpf_prog from __udp6_lib_err 01b437940f5e soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF 421fbf04bf2c soreuseport: change consume_skb to kfree_skb in error case 1ab50514c430 ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only f3dfd61c502d soreuseport: fix ordering for mixed v4/v6 sockets 245ee3c90795 soreuseport: fix NULL ptr dereference SO_REUSEPORT after bind 113fb209854a bpf: do not blindly change rlimit in reuseport net selftest 985253ef27d2 bpf: fix rlimit in reuseport net selftest ae61334510be soreuseport: Fix reuseport_bpf testcase on 32bit architectures 6efa24da01a5 udp: fix potential infinite loop in SO_REUSEPORT logic 66df70c6605d soreuseport: BPF selection functional test for TCP fe161031b8a8 soreuseport: pass skb to secondary UDP socket lookup 9223919efdf2 soreuseport: BPF selection functional test 2090ed790dbb soreuseport: fix mem leak in reuseport_add_sock() 67887f6ac3f1 Merge "diag: Ensure dci entry is valid before sending the packet" e41c0da23b38 diag: Prevent out of bound write while sending dci pkt to remote e1085d1ef39b diag: Ensure dci entry is valid before sending the packet 16802e80ecb5 Merge "ion: Fix integer overflow in msm_ion_custom_ioctl" 57146f83f388 ion: Fix integer overflow in msm_ion_custom_ioctl 6fc2001969fe diag: Use valid data_source for a valid token 0c6dbf858a98 qcacld-3.0: Avoid OOB read in dot11f_unpack_assoc_response f07caca0c485 qcacld-3.0: Fix array OOB for duplicate rate 5a359aba0364 msm: kgsl: Remove 'fd' dependency to get dma_buf handle da8317596949 msm: kgsl: Fix gpuaddr_in_range() to check upper bound 2ed91a98d8b4 msm: adsprpc: Handle UAF in fastrpc debugfs read 2967159ad303 msm: kgsl: Add a sysfs node to control performance counter reads e392a84f25f5 msm: kgsl: Perform cache flush on the pages obtained using get_user_pages() 28b45f75d2ee soc: qcom: hab: Add sanity check for payload_count 885caec7690f Merge "futex: Fix inode life-time issue" 0f57701d2643 Merge "futex: Handle faults correctly for PI futexes" 7d7eb450c333 Merge "futex: Rework inconsistent rt_mutex/futex_q state" 124ebd87ef2f msm: kgsl: Fix out of bound write in adreno_profile_submit_time 228bbfb25032 futex: Fix inode life-time issue 7075ca6a22b3 futex: Handle faults correctly for PI futexes a436b73e9032 futex: Simplify fixup_pi_state_owner() 11b99dbe3221 futex: Use pi_state_update_owner() in put_pi_state() f34484030550 rtmutex: Remove unused argument from rt_mutex_proxy_unlock() 079d1c90b3c3 futex: Provide and use pi_state_update_owner() 3b51e24eb17b futex: Replace pointless printk in fixup_owner() 0eac5c2583a1 futex: Avoid violating the 10th rule of futex 6d6ed38b7d10 futex: Rework inconsistent rt_mutex/futex_q state 3c8f7dfd59b5 futex: Remove rt_mutex_deadlock_account_*() 9c870a329520 futex,rt_mutex: Provide futex specific rt_mutex API 7504736e8725 msm: adsprpc: Handle UAF in process shell memory 994e5922a0c2 Disable TRACER Check to improve Camera Performance 8fb3f17b3ad1 msm: kgsl: Deregister gpu address on memdesc_sg_virt failure 13aa628efdca Merge "crypto: Fix possible stack out-of-bound error" 92e777451003 Merge "msm: kgsl: Correct the refcount on current process PID." 9ca218394ed4 Merge "msm: kgsl: Compare pid pointer instead of TGID for a new process" 7eed1f2e0f43 Merge "qcom,max-freq-level change for trial" 6afb5eb98e36 crypto: Fix possible stack out-of-bound error 8b5ba278ed4b msm: kgsl: Correct the refcount on current process PID. 4150552fac96 msm: kgsl: Compare pid pointer instead of TGID for a new process c272102c0793 qcom,max-freq-level change for trial 854ef3ce73f5 msm: kgsl: Protect the memdesc->gpuaddr in SVM use cases. 79c8161aeac9 msm: kgsl: Stop using memdesc->usermem. Change-Id: Iea7db1362c3cd18e36f243411e773a9054f6a445
-rw-r--r--Documentation/kbuild/00-INDEX2
-rw-r--r--Documentation/kbuild/kbuild.txt11
-rw-r--r--Documentation/kbuild/llvm.txt86
-rw-r--r--Documentation/kbuild/makefiles.txt16
-rw-r--r--Makefile59
-rw-r--r--arch/Kconfig19
-rw-r--r--arch/arm/boot/dts/qcom/msm8996-v3.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom/msm8996pro.dtsi1
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig15
-rw-r--r--arch/arm64/Makefile35
-rw-r--r--arch/arm64/configs/msmcortex_defconfig8
-rw-r--r--arch/arm64/crypto/Makefile2
-rw-r--r--arch/arm64/crypto/aes-ce-core.S87
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c (renamed from arch/arm64/crypto/aes-ce-cipher.c)117
-rw-r--r--arch/arm64/crypto/aes-modes.S16
-rw-r--r--arch/arm64/include/asm/assembler.h24
-rw-r--r--arch/arm64/include/asm/opcodes.h5
-rw-r--r--arch/arm64/include/asm/probes.h21
-rw-r--r--arch/arm64/include/asm/sysreg.h18
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/uapi/asm/setup.h2
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/app_setting.c4
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c14
-rw-r--r--arch/arm64/kernel/entry.S1
-rw-r--r--arch/arm64/kernel/image.h6
-rw-r--r--arch/arm64/kernel/insn.c1
-rw-r--r--arch/arm64/kernel/module.lds2
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c34
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h8
-rw-r--r--arch/arm64/kernel/probes/kprobes.c36
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S6
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S15
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/lib/memcpy.S3
-rw-r--r--arch/arm64/lib/memmove.S3
-rw-r--r--arch/arm64/lib/memset.S3
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c2
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/oprofile/init.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--drivers/acpi/ec.c6
-rw-r--r--drivers/acpi/sysfs.c8
-rw-r--r--drivers/android/binder.c14
-rw-r--r--drivers/base/Kconfig1
-rw-r--r--drivers/char/adsprpc.c11
-rw-r--r--drivers/char/diag/diag_dci.c20
-rw-r--r--drivers/char/ipmi/ipmi_poweroff.c2
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c4
-rw-r--r--drivers/crypto/msm/qce50.c12
-rw-r--r--drivers/edac/edac_mc_sysfs.c2
-rw-r--r--drivers/edac/edac_module.c3
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c5
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c2
-rw-r--r--drivers/firmware/efi/libstub/efistub.h2
-rw-r--r--drivers/gpu/msm/adreno.c5
-rw-r--r--drivers/gpu/msm/adreno.h9
-rw-r--r--drivers/gpu/msm/adreno_compat.c9
-rw-r--r--drivers/gpu/msm/adreno_dispatch.c4
-rw-r--r--drivers/gpu/msm/adreno_ioctl.c9
-rw-r--r--drivers/gpu/msm/adreno_sysfs.c33
-rw-r--r--drivers/gpu/msm/kgsl.c33
-rw-r--r--drivers/gpu/msm/kgsl_iommu.c10
-rw-r--r--drivers/gpu/msm/kgsl_mmu.c8
-rw-r--r--drivers/gpu/msm/kgsl_mmu.h7
-rw-r--r--drivers/gpu/msm/kgsl_pool.c10
-rw-r--r--drivers/hid/hid-magicmouse.c3
-rw-r--r--drivers/hwtracing/coresight/coresight-event.c10
-rw-r--r--drivers/ide/ide.c4
-rw-r--r--drivers/iio/Kconfig1
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c4
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/isdn/hardware/mISDN/avmfritz.c2
-rw-r--r--drivers/isdn/hardware/mISDN/mISDNinfineon.c2
-rw-r--r--drivers/isdn/hardware/mISDN/netjet.c2
-rw-r--r--drivers/isdn/hardware/mISDN/speedfax.c2
-rw-r--r--drivers/isdn/hardware/mISDN/w6692.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/media/platform/msm/camera_v2/isp/msm_isp32.c6
-rw-r--r--drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c4
-rw-r--r--drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c2
-rw-r--r--drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c1
-rw-r--r--drivers/media/usb/uvc/uvc_driver.c4
-rw-r--r--drivers/message/fusion/mptbase.c4
-rw-r--r--drivers/misc/ibmasm/ibmasm.h2
-rw-r--r--drivers/misc/kgdbts.c3
-rw-r--r--drivers/mtd/devices/block2mtd.c2
-rw-r--r--drivers/mtd/devices/phram.c2
-rw-r--r--drivers/mtd/ubi/build.c2
-rw-r--r--drivers/net/wireless/wcnss/wcnss_wlan.c2
-rw-r--r--drivers/of/of_reserved_mem.c4
-rw-r--r--drivers/pci/pcie/aspm.c5
-rw-r--r--drivers/perf/arm_pmu.c2
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c2
-rw-r--r--drivers/power/reset/msm-poweroff.c4
-rw-r--r--drivers/scsi/fcoe/fcoe_transport.c20
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c2
-rw-r--r--drivers/soc/qcom/hab/hab.h8
-rw-r--r--drivers/soc/qcom/hab/hab_msg.c59
-rw-r--r--drivers/soc/qcom/hab/khab_test.c8
-rw-r--r--drivers/staging/android/ion/ion.c9
-rw-r--r--drivers/staging/android/ion/msm/msm_ion.c31
-rw-r--r--drivers/staging/rdma/ipath/ipath_init_chip.c4
-rw-r--r--drivers/tty/serial/kgdboc.c3
-rw-r--r--drivers/usb/chipidea/otg_fsm.h2
-rw-r--r--drivers/usb/gadget/composite.c3
-rw-r--r--drivers/usb/gadget/configfs.c4
-rw-r--r--drivers/usb/gadget/function/f_accessory.c20
-rw-r--r--drivers/usb/gadget/function/rndis.c10
-rw-r--r--drivers/usb/gadget/legacy/inode.c10
-rw-r--r--drivers/usb/serial/console.c2
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/video/console/dummycon.c83
-rw-r--r--drivers/video/fbdev/msm/mdss_dsi_status.c5
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/file.c14
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/cifs/cifs_dfs_ref.c7
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/exofs/inode.c12
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inode.c28
-rw-r--r--fs/ext4/namei.c19
-rw-r--r--fs/ext4/super.c100
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/kernfs/mount.c3
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c23
-rw-r--r--fs/logfs/dev_bdev.c4
-rw-r--r--fs/logfs/dev_mtd.c4
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/logfs.h4
-rw-r--r--fs/namei.c1
-rw-r--r--fs/namespace.c21
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4namespace.c2
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/notify/fanotify/Kconfig1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/read_write.c54
-rw-r--r--fs/super.c15
-rw-r--r--include/asm-generic/vmlinux.lds.h94
-rw-r--r--include/linux/bpf-cgroup.h15
-rw-r--r--include/linux/bpf.h28
-rw-r--r--include/linux/cgroup-defs.h6
-rw-r--r--include/linux/compat.h15
-rw-r--r--include/linux/compiler.h23
-rw-r--r--include/linux/debugfs.h3
-rw-r--r--include/linux/export.h14
-rw-r--r--include/linux/ftrace.h8
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/init.h38
-rw-r--r--include/linux/moduleparam.h16
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/pagemap.h4
-rw-r--r--include/linux/pid.h5
-rw-r--r--include/linux/proc_fs.h6
-rw-r--r--include/linux/rculist_nulls.h1
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/linux/udp.h8
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/inet6_hashtables.h13
-rw-r--r--include/net/inet_hashtables.h25
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/phonet/phonet.h2
-rw-r--r--include/net/ping.h2
-rw-r--r--include/net/raw.h2
-rw-r--r--include/net/sock.h18
-rw-r--r--include/net/sock_reuseport.h2
-rw-r--r--include/net/udp.h5
-rw-r--r--include/trace/events/filelock.h77
-rw-r--r--include/uapi/asm-generic/unistd.h6
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/sched.h1
-rw-r--r--init/Kconfig10
-rw-r--r--init/Makefile2
-rw-r--r--kernel/bpf/cgroup.c33
-rw-r--r--kernel/bpf/core.c24
-rw-r--r--kernel/bpf/syscall.c57
-rw-r--r--kernel/cgroup.c31
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c136
-rw-r--r--kernel/pid.c74
-rw-r--r--kernel/signal.c142
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/readahead.c2
-rw-r--r--net/compat.c3
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/skbuff.c29
-rw-r--r--net/core/sock_reuseport.c58
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ipv4/af_inet.c21
-rw-r--r--net/ipv4/inet_connection_sock.c22
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_hashtables.c66
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/udp.c308
-rw-r--r--net/ipv4/udp_diag.c20
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv6/af_inet6.c25
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/inet6_hashtables.c80
-rw-r--r--net/ipv6/tcp_ipv6.c10
-rw-r--r--net/ipv6/udp.c246
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ip6.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_socket.c32
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--samples/bpf/bpf_helpers.h4
-rw-r--r--scripts/Kbuild.include57
-rw-r--r--scripts/Makefile.build57
-rw-r--r--scripts/Makefile.host2
-rw-r--r--scripts/Makefile.lib4
-rw-r--r--scripts/Makefile.modpost14
-rwxr-xr-xscripts/clang-version.sh33
-rw-r--r--scripts/extract-cert.c7
-rwxr-xr-xscripts/link-vmlinux.sh118
-rwxr-xr-xscripts/sign-file.c7
-rw-r--r--security/apparmor/lsm.c16
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--security/selinux/include/classmap.h3
-rw-r--r--security/selinux/include/security.h1
-rw-r--r--security/selinux/nlmsgtab.c24
-rw-r--r--security/selinux/ss/policydb.c4
-rw-r--r--security/selinux/ss/policydb.h2
-rw-r--r--security/selinux/ss/services.c2
-rw-r--r--sound/core/control.c78
-rw-r--r--sound/core/control_compat.c10
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c641
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_cpu.c258
269 files changed, 3920 insertions, 1277 deletions
diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index 8c5e6aa78004..b367200c94b5 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -1,5 +1,7 @@
00-INDEX
- this file: info on the kernel build process
+llvm.txt
+ - how to compile Linux with llvm
headers_install.txt
- how to export Linux headers for use by userspace
kbuild.txt
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 0ff6a466a05b..3e63c2361291 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -238,3 +238,14 @@ KBUILD_VMLINUX_MAIN
All object files for the main part of vmlinux.
KBUILD_VMLINUX_INIT and KBUILD_VMLINUX_MAIN together specify
all the object files used to link vmlinux.
+
+KBUILD_VMLINUX_LIBS
+--------------------------------------------------
+All .a "lib" files for vmlinux.
+KBUILD_VMLINUX_INIT, KBUILD_VMLINUX_MAIN, and KBUILD_VMLINUX_LIBS together
+specify all the object files used to link vmlinux.
+
+LLVM
+----
+If this variable is set to 1, Kbuild will use Clang and LLVM utilities instead
+of GCC and GNU binutils to build the kernel.
diff --git a/Documentation/kbuild/llvm.txt b/Documentation/kbuild/llvm.txt
new file mode 100644
index 000000000000..51c009baa35a
--- /dev/null
+++ b/Documentation/kbuild/llvm.txt
@@ -0,0 +1,86 @@
+Building Linux with Clang/LLVM
+==============================
+
+This document covers how to build the Linux kernel with Clang and LLVM
+utilities.
+
+About
+-----
+
+The Linux kernel has always traditionally been compiled with GNU
+toolchains such as GCC and binutils. Ongoing work has allowed for
+[Clang](https://clang.llvm.org/) and [LLVM](https://llvm.org/) utilities
+to be used as viable substitutes. Distributions such as
+[Android](https://www.android.com/),
+[ChromeOS](https://www.chromium.org/chromium-os), and
+[OpenMandriva](https://www.openmandriva.org/) use Clang built kernels.
+[LLVM is a collection of toolchain components implemented in terms of
+C++ objects](https://www.aosabook.org/en/llvm.html). Clang is a
+front-end to LLVM that supports C and the GNU C extensions required by
+the kernel, and is pronounced "klang," not "see-lang."
+
+Clang
+-----
+
+The compiler used can be swapped out via [CC=]{.title-ref} command line
+argument to [make]{.title-ref}. [CC=]{.title-ref} should be set when
+selecting a config and during a build.
+
+> make CC=clang defconfig
+>
+> make CC=clang
+
+Cross Compiling
+---------------
+
+A single Clang compiler binary will typically contain all supported
+backends, which can help simplify cross compiling.
+
+> ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
+
+[CROSS\_COMPILE]{.title-ref} is not used to prefix the Clang compiler
+binary, instead [CROSS\_COMPILE]{.title-ref} is used to set a command
+line flag: [--target <triple>]{.title-ref}. For example:
+
+> clang --target aarch64-linux-gnu foo.c
+
+LLVM Utilities
+--------------
+
+LLVM has substitutes for GNU binutils utilities. These can be invoked as
+additional parameters to [make]{.title-ref}.
+
+> make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\
+> OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\
+> READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\
+> HOSTLD=ld.lld
+
+Currently, the integrated assembler is disabled by default. You can pass
+`LLVM_IAS=1` to enable it.
+
+Getting Help
+------------
+
+- [Website](https://clangbuiltlinux.github.io/)
+- [Mailing
+ List](https://groups.google.com/forum/#!forum/clang-built-linux):
+ <clang-built-linux@googlegroups.com>
+- [Issue Tracker](https://github.com/ClangBuiltLinux/linux/issues)
+- IRC: \#clangbuiltlinux on chat.freenode.net
+- [Telegram](https://t.me/ClangBuiltLinux): \@ClangBuiltLinux
+- [Wiki](https://github.com/ClangBuiltLinux/linux/wiki)
+- [Beginner
+ Bugs](https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)
+
+Getting LLVM
+------------
+
+- <http://releases.llvm.org/download.html>
+- <https://github.com/llvm/llvm-project>
+- <https://llvm.org/docs/GettingStarted.html>
+- <https://llvm.org/docs/CMake.html>
+- <https://apt.llvm.org/>
+- <https://www.archlinux.org/packages/extra/x86_64/llvm/>
+- <https://github.com/ClangBuiltLinux/tc-build>
+- <https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source>
+- <https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/>
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 64c84e2d06c0..fba7bed6193a 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -41,6 +41,7 @@ This document describes the Linux kernel Makefiles.
--- 6.8 Custom kbuild commands
--- 6.9 Preprocessing linker scripts
--- 6.10 Generic header files
+ --- 6.11 Post-link pass
=== 7 Kbuild syntax for exported headers
--- 7.1 no-export-headers-y
@@ -1251,6 +1252,21 @@ When kbuild executes, the following steps are followed (roughly):
For example, powerpc uses this to check relocation sanity of
the linked vmlinux file.
+--- 6.11 Post-link pass
+
+ If the file arch/xxx/Makefile.postlink exists, this makefile
+ will be invoked for post-link objects (vmlinux and modules.ko)
+ for architectures to run post-link passes on. Must also handle
+ the clean target.
+
+ This pass runs after kallsyms generation. If the architecture
+ needs to modify symbol locations, rather than manipulate the
+ kallsyms, it may be easier to add another postlink target for
+ .tmp_vmlinux? targets to be called from link-vmlinux.sh.
+
+ For example, powerpc uses this to check relocation sanity of
+ the linked vmlinux file.
+
=== 7 Kbuild syntax for exported headers
The kernel includes a set of headers that is exported to userspace.
diff --git a/Makefile b/Makefile
index 347dc9c6d7e9..26643c97f6af 100644
--- a/Makefile
+++ b/Makefile
@@ -301,8 +301,14 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
else if [ -x /bin/bash ]; then echo /bin/bash; \
else echo sh; fi ; fi)
+ifneq ($(LLVM),)
+HOSTCC = clang
+HOSTCXX = clang++
+else
HOSTCC = gcc
HOSTCXX = g++
+endif
+
HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
HOSTCXXFLAGS = -O2
@@ -337,15 +343,27 @@ scripts/Kbuild.include: ;
include scripts/Kbuild.include
# Make variables (CC, etc...)
+CPP = $(CC) -E
+ifneq ($(LLVM),)
+CC = clang
+LD = ld.lld
+AR = llvm-ar
+NM = llvm-nm
+OBJCOPY = llvm-objcopy
+OBJDUMP = llvm-objdump
+READELF = llvm-readelf
+OBJSIZE = llvm-size
+STRIP = llvm-strip
+else
AS = $(CROSS_COMPILE)as
LD = $(CROSS_COMPILE)ld
CC = $(CROSS_COMPILE)gcc
-CPP = $(CC) -E
AR = $(CROSS_COMPILE)ar
NM = $(CROSS_COMPILE)nm
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
+endif
AWK = awk
GENKSYMS = scripts/genksyms/genksyms
INSTALLKERNEL := installkernel
@@ -400,6 +418,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
KBUILD_AFLAGS_MODULE := -DMODULE
KBUILD_CFLAGS_MODULE := -DMODULE
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
+LDFLAGS :=
CLANG_FLAGS :=
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
@@ -622,7 +641,9 @@ endif
ifneq ($(GCC_TOOLCHAIN),)
CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN)
endif
+ifneq ($(LLVM_IAS),1)
CLANG_FLAGS += -no-integrated-as
+endif
CLANG_FLAGS += -Werror=unknown-warning-option
CLANG_FLAGS += $(call cc-option, -Wno-misleading-indentation)
CLANG_FLAGS += $(call cc-option, -Wno-bool-operation)
@@ -646,6 +667,11 @@ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias)
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS += $(call cc-option,-fdata-sections,)
+endif
+
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
else
@@ -772,8 +798,11 @@ KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
else
KBUILD_CFLAGS += -g
endif
+ifneq ($(LLVM_IAS),1)
KBUILD_AFLAGS += -Wa,-gdwarf-2
endif
+endif
+
ifdef CONFIG_DEBUG_INFO_DWARF4
KBUILD_CFLAGS += $(call cc-option, -gdwarf-4,)
endif
@@ -883,6 +912,10 @@ LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+LDFLAGS_vmlinux += $(call ld-option, --gc-sections,)
+endif
+
ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
LDFLAGS_vmlinux += $(call ld-option, -X,)
endif
@@ -985,22 +1018,27 @@ core-y := $(patsubst %/, %/built-in.o, $(core-y))
drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y))
net-y := $(patsubst %/, %/built-in.o, $(net-y))
libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
-libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
-libs-y := $(libs-y1) $(libs-y2)
+libs-y2 := $(filter-out %.a, $(patsubst %/, %/built-in.o, $(libs-y)))
virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
# Externally visible symbols (used by link-vmlinux.sh)
export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
-export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
+export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y2) $(drivers-y) $(net-y) $(virt-y)
+export KBUILD_VMLINUX_LIBS := $(libs-y1)
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
export LDFLAGS_vmlinux
# used by scripts/pacmage/Makefile
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
-vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
+vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) $(KBUILD_VMLINUX_LIBS)
+
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
# Final link of vmlinux
- cmd_link-vmlinux = $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux)
+ cmd_link-vmlinux = \
+ $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux); \
+ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+
quiet_cmd_link-vmlinux = LINK $@
# Include targets which we want to
@@ -1018,8 +1056,16 @@ endif
ifdef CONFIG_GDB_SCRIPTS
$(Q)ln -fsn `cd $(srctree) && /bin/pwd`/scripts/gdb/vmlinux-gdb.py
endif
+ifdef CONFIG_TRIM_UNUSED_KSYMS
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
+ "$(MAKE) KBUILD_MODULES=1 -f $(srctree)/Makefile vmlinux_prereq"
+endif
+$(call if_changed,link-vmlinux)
+# standalone target for easier testing
+include/generated/autoksyms.h: FORCE
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh true
+
# The actual objects are generated when descending,
# make sure no implicit rule kicks in
$(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
@@ -1295,6 +1341,7 @@ $(clean-dirs):
vmlinuxclean:
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
+ $(Q)$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) clean)
clean: archclean vmlinuxclean
diff --git a/arch/Kconfig b/arch/Kconfig
index a29c6098fb98..b8629dc08ae0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -432,6 +432,25 @@ config HAVE_ARCH_WITHIN_STACK_FRAMES
and similar) by implementing an inline arch_within_stack_frames(),
which is used by CONFIG_HARDENED_USERCOPY.
+config THIN_ARCHIVES
+ def_bool y
+ help
+ Select this if the architecture wants to use thin archives
+ instead of ld -r to create the built-in.o files.
+
+config LD_DEAD_CODE_DATA_ELIMINATION
+ bool
+ help
+ Select this if the architecture wants to do dead code and
+ data elimination with the linker by compiling with
+ -ffunction-sections -fdata-sections and linking with
+ --gc-sections.
+
+ This requires that the arch annotates or otherwise protects
+ its external entry points from being discarded. Linker scripts
+ must also merge .text.*, .data.*, and .bss.* correctly into
+ output sections.
+
config HAVE_CONTEXT_TRACKING
bool
help
diff --git a/arch/arm/boot/dts/qcom/msm8996-v3.dtsi b/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
index 711e0c259c6a..aaf6b4250459 100644
--- a/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
@@ -941,7 +941,6 @@
};
qcom,msm-thermal {
msm_thermal_freq: qcom,vdd-apps-rstr{
- qcom,max-freq-level = <1190400>;
qcom,levels = <1036800 1555200 1555200>;
};
qcom,vdd-gfx-rstr{
diff --git a/arch/arm/boot/dts/qcom/msm8996pro.dtsi b/arch/arm/boot/dts/qcom/msm8996pro.dtsi
index f140e78a16d7..d0949d70c3ca 100644
--- a/arch/arm/boot/dts/qcom/msm8996pro.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8996pro.dtsi
@@ -1332,7 +1332,6 @@
qcom,limit-temp = <80>;
qcom,core-limit-temp = <90>;
msm_thermal_freq: qcom,vdd-apps-rstr {
- qcom,max-freq-level = <1209600>;
qcom,levels = <1056000 1516800 1516800>;
};
qcom,vdd-gfx-rstr{
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 95a000515e43..7d656525a671 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on MMU && OF
select PREEMPT_NOTIFIERS
- select ANON_INODES
select ARM_GIC
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index f4e9585226f3..fe0fb38df196 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -405,3 +405,4 @@
388 common userfaultfd sys_userfaultfd
389 common membarrier sys_membarrier
390 common mlock2 sys_mlock2
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8b055133ae2a..28f09a00143b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -458,18 +458,13 @@ config ARM64_ERRATUM_845719
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
- depends on MODULES
default y
- select ARM64_MODULE_CMODEL_LARGE
+ select ARM64_MODULE_CMODEL_LARGE if MODULES
help
- This option builds kernel modules using the large memory model in
- order to avoid the use of the ADRP instruction, which can cause
- a subsequent memory access to use an incorrect address on Cortex-A53
- parts up to r0p4.
-
- Note that the kernel itself must be linked with a version of ld
- which fixes potentially affected ADRP instructions through the
- use of veneers.
+ This option links the kernel with '--fix-cortex-a53-843419' and
+ builds modules using the large memory model in order to avoid the use
+ of the ADRP instruction, which can cause a subsequent memory access
+ to use an incorrect address on Cortex-A53 parts up to r0p4.
If unsure, say Y.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 4df2b48fae44..adc88e707c09 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,13 +10,34 @@
#
# Copyright (C) 1995-2001 by Russell King
-LDFLAGS_vmlinux :=--no-undefined -X
+LDFLAGS_vmlinux :=--no-undefined -X -z norelro
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
-ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux += -pie -Bsymbolic
+ifeq ($(CONFIG_RELOCATABLE), y)
+# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+# for relative relocs, since this leads to better Image compression
+# with the relocation offsets always being zero.
+LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \
+ $(call ld-option, --no-apply-dynamic-relocs)
+endif
+
+ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
+ ifeq ($(call ld-option, --fix-cortex-a53-843419),)
+$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
+ else
+ ifeq ($(call gold-ifversion, -lt, 114000000, y), y)
+$(warning This version of GNU gold may generate incorrect code with --fix-cortex-a53-843419;\
+ see https://sourceware.org/bugzilla/show_bug.cgi?id=21491)
+ endif
+LDFLAGS_vmlinux += --fix-cortex-a53-843419
+ endif
+else
+ ifeq ($(ld-name),gold)
+# Pass --no-fix-cortex-a53-843419 to ensure the erratum fix is disabled
+LDFLAGS += --no-fix-cortex-a53-843419
+ endif
endif
KBUILD_DEFCONFIG := defconfig
@@ -75,11 +96,15 @@ KBUILD_AFLAGS += $(lseinstr) $(vdso32)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
AS += -EB
-LD += -EB
+# Prefer the baremetal ELF build target, but not all toolchains include
+LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb)
+UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
AS += -EL
-LD += -EL
+# Same as above, prefer ELF but fall back to linux target if needed.
+LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux)
+UTS_MACHINE := aarch64
endif
CHECKFLAGS += -D__aarch64__
diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig
index 478140987c2d..a205cdab4c6f 100644
--- a/arch/arm64/configs/msmcortex_defconfig
+++ b/arch/arm64/configs/msmcortex_defconfig
@@ -703,11 +703,11 @@ CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_IPC_LOGGING=y
CONFIG_QCOM_RTB=y
CONFIG_QCOM_RTB_SEPARATE_CPUS=y
-CONFIG_FUNCTION_TRACER=y
+#CONFIG_FUNCTION_TRACER=y
CONFIG_PREEMPTIRQ_EVENTS=y
-CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPT_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
+#CONFIG_IRQSOFF_TRACER=y
+#CONFIG_PREEMPT_TRACER=y
+#CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_CPU_FREQ_SWITCH_PROFILER=y
CONFIG_LKDTM=y
CONFIG_MEMTEST=y
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index f0a8f2475ea3..1071dfcdaf1b 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_CRYPTO_POLY_HASH_ARM64_CE) += poly-hash-ce.o
poly-hash-ce-y := poly-hash-ce-glue.o poly-hash-ce-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8efdfdade393
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .arch armv8-a+crypto
+
+ENTRY(__aes_ce_encrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aese v0.16b, v2.16b
+ aesmc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aese v0.16b, v1.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aese v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+ENDPROC(__aes_ce_encrypt)
+
+ENTRY(__aes_ce_decrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aesd v0.16b, v2.16b
+ aesimc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aesd v0.16b, v3.16b
+ aesimc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aesd v0.16b, v1.16b
+ aesimc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aesd v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+ENDPROC(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ * substitution on each byte in 'input'
+ */
+ENTRY(__aes_ce_sub)
+ dup v1.4s, w0
+ movi v0.16b, #0
+ aese v0.16b, v1.16b
+ umov w0, v0.s[0]
+ ret
+ENDPROC(__aes_ce_sub)
+
+ENTRY(__aes_ce_invert)
+ ld1 {v0.4s}, [x1]
+ aesimc v1.16b, v0.16b
+ st1 {v1.4s}, [x0]
+ ret
+ENDPROC(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-glue.c
index 50d9fe11d0c8..60af7bddfbb9 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -24,6 +24,13 @@ struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
+asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+ const struct aes_block *in);
+
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
@@ -39,113 +46,21 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- struct aes_block *out = (struct aes_block *)dst;
- struct aes_block const *in = (struct aes_block *)src;
- void *dummy0;
- int dummy1;
kernel_neon_begin_partial(4);
-
- __asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
- " cmp %w[rounds], #10 ;"
- " bmi 0f ;"
- " bne 3f ;"
- " mov v3.16b, v1.16b ;"
- " b 2f ;"
- "0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- "1: aese v0.16b, v2.16b ;"
- " aesmc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
- " aese v0.16b, v3.16b ;"
- " aesmc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
- " subs %w[rounds], %w[rounds], #3 ;"
- " aese v0.16b, v1.16b ;"
- " aesmc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- " bpl 1b ;"
- " aese v0.16b, v2.16b ;"
- " eor v0.16b, v0.16b, v3.16b ;"
- " st1 {v0.16b}, %[out] ;"
-
- : [out] "=Q"(*out),
- [key] "=r"(dummy0),
- [rounds] "=r"(dummy1)
- : [in] "Q"(*in),
- "1"(ctx->key_enc),
- "2"(num_rounds(ctx) - 2)
- : "cc");
-
+ __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
kernel_neon_end();
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- struct aes_block *out = (struct aes_block *)dst;
- struct aes_block const *in = (struct aes_block *)src;
- void *dummy0;
- int dummy1;
kernel_neon_begin_partial(4);
-
- __asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
- " cmp %w[rounds], #10 ;"
- " bmi 0f ;"
- " bne 3f ;"
- " mov v3.16b, v1.16b ;"
- " b 2f ;"
- "0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- "1: aesd v0.16b, v2.16b ;"
- " aesimc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
- " aesd v0.16b, v3.16b ;"
- " aesimc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
- " subs %w[rounds], %w[rounds], #3 ;"
- " aesd v0.16b, v1.16b ;"
- " aesimc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- " bpl 1b ;"
- " aesd v0.16b, v2.16b ;"
- " eor v0.16b, v0.16b, v3.16b ;"
- " st1 {v0.16b}, %[out] ;"
-
- : [out] "=Q"(*out),
- [key] "=r"(dummy0),
- [rounds] "=r"(dummy1)
- : [in] "Q"(*in),
- "1"(ctx->key_dec),
- "2"(num_rounds(ctx) - 2)
- : "cc");
-
+ __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
kernel_neon_end();
}
-/*
- * aes_sub() - use the aese instruction to perform the AES sbox substitution
- * on each byte in 'input'
- */
-static u32 aes_sub(u32 input)
-{
- u32 ret;
-
- __asm__("dup v1.4s, %w[in] ;"
- "movi v0.16b, #0 ;"
- "aese v0.16b, v1.16b ;"
- "umov %w[out], v0.4s[0] ;"
-
- : [out] "=r"(ret)
- : [in] "r"(input)
- : "v0","v1");
-
- return ret;
-}
-
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
@@ -174,9 +89,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
u32 *rko = rki + kwords;
#ifndef CONFIG_CPU_BIG_ENDIAN
- rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
#else
- rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+ rko[0] = rol32(__aes_ce_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
rki[0];
#endif
rko[1] = rko[0] ^ rki[1];
@@ -191,7 +106,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
} else if (key_len == AES_KEYSIZE_256) {
if (i >= 6)
break;
- rko[4] = aes_sub(rko[3]) ^ rki[4];
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
@@ -210,13 +125,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
- __asm__("ld1 {v0.16b}, %[in] ;"
- "aesimc v1.16b, v0.16b ;"
- "st1 {v1.16b}, %[out] ;"
-
- : [out] "=Q"(key_dec[i])
- : [in] "Q"(key_enc[j])
- : "v0","v1");
+ __aes_ce_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
kernel_neon_end();
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 838dad5c209f..067b4e222f74 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -297,17 +297,19 @@ AES_ENTRY(aes_ctr_encrypt)
eor v1.16b, v1.16b, v3.16b
st1 {v0.16b-v1.16b}, [x0], #32
#else
- ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w8
+ add w7, w6, #1
mov v0.16b, v4.16b
- add v7.4s, v7.4s, v8.4s
+ add w8, w6, #2
mov v1.16b, v4.16b
- rev32 v8.16b, v7.16b
+ add w9, w6, #3
mov v2.16b, v4.16b
+ rev w7, w7
mov v3.16b, v4.16b
- mov v1.s[3], v8.s[0]
- mov v2.s[3], v8.s[1]
- mov v3.s[3], v8.s[2]
+ rev w8, w8
+ mov v1.s[3], w7
+ rev w9, w9
+ mov v2.s[3], w8
+ mov v3.s[3], w9
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
do_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 7dcfd83ff5e8..5b7de95d6a48 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -329,21 +329,33 @@ lr .req x30 // link register
* size: size of the region
* Corrupts: kaddr, size, tmp1, tmp2
*/
+ .macro __dcache_op_workaround_clean_cache, op, kaddr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \kaddr
+alternative_else
+ dc civac, \kaddr
+alternative_endif
+ .endm
+
.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
dcache_line_size \tmp1, \tmp2
add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
bic \kaddr, \kaddr, \tmp2
9998:
- .if (\op == cvau || \op == cvac)
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
+ .ifc \op, cvau
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvac
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvap
+ sys 3, c7, c12, 1, \kaddr // dc cvap
.else
dc \op, \kaddr
.endif
+ .endif
+ .endif
add \kaddr, \kaddr, \tmp1
cmp \kaddr, \size
b.lo 9998b
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
deleted file mode 100644
index 123f45d92cd1..000000000000
--- a/arch/arm64/include/asm/opcodes.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
-#endif
-
-#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 5af574d632fa..6a5b28904c33 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -15,21 +15,22 @@
#ifndef _ARM_PROBES_H
#define _ARM_PROBES_H
-#include <asm/opcodes.h>
-
-struct kprobe;
-struct arch_specific_insn;
-
-typedef u32 kprobe_opcode_t;
-typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+typedef u32 probe_opcode_t;
+typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
/* architecture specific copy of original instruction */
-struct arch_specific_insn {
- kprobe_opcode_t *insn;
+struct arch_probe_insn {
+ probe_opcode_t *insn;
pstate_check_t *pstate_cc;
- kprobes_handler_t *handler;
+ probes_handler_t *handler;
/* restore address after step xol */
unsigned long restore;
};
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
#endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c768daa084ca..6bc2f3d28b68 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,8 +22,6 @@
#include <linux/stringify.h>
-#include <asm/opcodes.h>
-
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -37,6 +35,14 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+#ifdef __ASSEMBLY__
+// The space separator is omitted so that __emit_inst(x) can be parsed as
+// either an assembler directive or an assembler macro argument.
+#define __emit_inst(x) .inst(x)
+#else
+#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
+#endif
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -81,10 +87,10 @@
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
-#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
- (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
- (!!x)<<8 | 0x1f)
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \
+ (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \
+ (!!x)<<8 | 0x1f)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 41e58fe3c041..63469acc5992 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
-#define __NR_compat_syscalls 390
+#define __NR_compat_syscalls 435
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5b925b761a2a..65e8559fe4aa 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -801,6 +801,8 @@ __SYSCALL(__NR_execveat, compat_sys_execveat)
__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
#define __NR_membarrier 389
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/uapi/asm/setup.h b/arch/arm64/include/uapi/asm/setup.h
index 9cf2e46fbbdf..7474c813d5ab 100644
--- a/arch/arm64/include/uapi/asm/setup.h
+++ b/arch/arm64/include/uapi/asm/setup.h
@@ -21,6 +21,6 @@
#include <linux/types.h>
-#define COMMAND_LINE_SIZE 2048
+#define COMMAND_LINE_SIZE 4096
#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 18938199a838..cd3597a75b3b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -10,6 +10,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
# Object file lists.
arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
diff --git a/arch/arm64/kernel/app_setting.c b/arch/arm64/kernel/app_setting.c
index 0c6b00317645..9ee7a753c080 100644
--- a/arch/arm64/kernel/app_setting.c
+++ b/arch/arm64/kernel/app_setting.c
@@ -28,7 +28,7 @@ static struct kparam_string kps = {
.string = lib_str,
.maxlen = MAX_LEN,
};
-static int set_name(const char *str, struct kernel_param *kp);
+static int set_name(const char *str, const struct kernel_param *kp);
module_param_call(lib_name, set_name, param_get_string, &kps, S_IWUSR);
bool use_app_setting = true;
@@ -43,7 +43,7 @@ bool use_32bit_app_setting_pro;
module_param(use_32bit_app_setting_pro, bool, 0644);
MODULE_PARM_DESC(use_32bit_app_setting_pro, "control use of 32 bit app specific settings");
-static int set_name(const char *str, struct kernel_param *kp)
+static int set_name(const char *str, const struct kernel_param *kp)
{
int len = strlen(str);
char *name;
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index e012ecd018ee..89160fcccf8b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -16,7 +16,6 @@
#include <asm/cpufeature.h>
#include <asm/insn.h>
-#include <asm/opcodes.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
@@ -63,6 +62,7 @@ struct insn_emulation {
static LIST_HEAD(insn_emulation);
static int nr_insn_emulated __initdata;
static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+static DEFINE_MUTEX(insn_emulation_mutex);
static void register_emulation_hooks(struct insn_emulation_ops *ops)
{
@@ -208,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret = 0;
- struct insn_emulation *insn = (struct insn_emulation *) table->data;
+ struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
enum insn_emulation_mode prev_mode = insn->current_mode;
- table->data = &insn->current_mode;
+ mutex_lock(&insn_emulation_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || prev_mode == insn->current_mode)
@@ -224,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
update_insn_emulation_mode(insn, INSN_UNDEF);
}
ret:
- table->data = insn;
+ mutex_unlock(&insn_emulation_mutex);
return ret;
}
@@ -254,7 +254,7 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
sysctl->maxlen = sizeof(int);
sysctl->procname = insn->ops->name;
- sysctl->data = insn;
+ sysctl->data = &insn->current_mode;
sysctl->extra1 = &insn->min;
sysctl->extra2 = &insn->max;
sysctl->proc_handler = emulation_proc_handler;
@@ -366,6 +366,10 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDTEST_FAIL 0
+#define ARM_OPCODE_CONDTEST_PASS 1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
#define ARM_OPCODE_CONDITION_UNCOND 0xf
static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f0ca0eb3b077..242f008666e5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -144,7 +144,6 @@ alternative_else_nop_endif
#else
str x20, [tsk, #TI_ADDR_LIMIT]
#endif
- ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 86d444f9c2c1..0c46b0b5ba29 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -73,7 +73,11 @@
#ifdef CONFIG_EFI
-__efistub_stext_offset = stext - _text;
+/*
+ * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol:
+ * https://github.com/ClangBuiltLinux/linux/issues/561
+ */
+__efistub_stext_offset = ABSOLUTE(stext - _text);
/*
* Prevent the symbol aliases below from being emitted into the kallsyms
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index a3f8f8bbfc92..59a4139b3294 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,7 +30,6 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
-#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index 8949f6c6f729..05881e2b414c 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,3 +1,3 @@
SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
+ .plt : { BYTE(0) }
}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index f7931d900bca..6ece25660da0 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -77,8 +77,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD If instruction is supported and uses instruction slot,
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
-static enum kprobe_insn __kprobes
-arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
{
/*
* Instructions reading or modifying the PC won't work from the XOL
@@ -88,26 +88,26 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD;
if (aarch64_insn_is_bcond(insn)) {
- asi->handler = simulate_b_cond;
+ api->handler = simulate_b_cond;
} else if (aarch64_insn_is_cbz(insn) ||
aarch64_insn_is_cbnz(insn)) {
- asi->handler = simulate_cbz_cbnz;
+ api->handler = simulate_cbz_cbnz;
} else if (aarch64_insn_is_tbz(insn) ||
aarch64_insn_is_tbnz(insn)) {
- asi->handler = simulate_tbz_tbnz;
+ api->handler = simulate_tbz_tbnz;
} else if (aarch64_insn_is_adr_adrp(insn)) {
- asi->handler = simulate_adr_adrp;
+ api->handler = simulate_adr_adrp;
} else if (aarch64_insn_is_b(insn) ||
aarch64_insn_is_bl(insn)) {
- asi->handler = simulate_b_bl;
+ api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
- asi->handler = simulate_br_blr_ret;
+ api->handler = simulate_br_blr_ret;
} else if (aarch64_insn_is_ldr_lit(insn)) {
- asi->handler = simulate_ldr_literal;
+ api->handler = simulate_ldr_literal;
} else if (aarch64_insn_is_ldrsw_lit(insn)) {
- asi->handler = simulate_ldrsw_literal;
+ api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -119,6 +119,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD_NO_SLOT;
}
+#ifdef CONFIG_KPROBES
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
@@ -137,13 +138,13 @@ is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
return false;
}
-enum kprobe_insn __kprobes
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
- enum kprobe_insn decoded;
- kprobe_opcode_t insn = le32_to_cpu(*addr);
- kprobe_opcode_t *scan_start = addr - 1;
- kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
+ enum probe_insn decoded;
+ probe_opcode_t insn = le32_to_cpu(*addr);
+ probe_opcode_t *scan_start = addr - 1;
+ probe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
struct module *mod;
#endif
@@ -164,7 +165,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
preempt_enable();
}
#endif
- decoded = arm_probe_decode_insn(insn, asi);
+ decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded == INSN_REJECTED ||
is_probed_address_atomic(scan_start, scan_end))
@@ -172,3 +173,4 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
return decoded;
}
+#endif
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index d438289646a6..76d3f315407f 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -23,13 +23,17 @@
*/
#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
-enum kprobe_insn {
+enum probe_insn {
INSN_REJECTED,
INSN_GOOD_NO_SLOT,
INSN_GOOD,
};
-enum kprobe_insn __kprobes
+#ifdef CONFIG_KPROBES
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+#endif
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 4ea8433011d0..40278b937088 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,31 +44,31 @@ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
- p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+ p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
- flush_icache_range((uintptr_t) (p->ainsn.insn),
- (uintptr_t) (p->ainsn.insn) +
+ flush_icache_range((uintptr_t) (p->ainsn.api.insn),
+ (uintptr_t) (p->ainsn.api.insn) +
MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.restore = (unsigned long) p->addr +
+ p->ainsn.api.restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.restore = 0;
+ p->ainsn.api.restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (p->ainsn.handler)
- p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(kcb, regs);
@@ -98,18 +98,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.insn = NULL;
+ p->ainsn.api.insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn)
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
return -ENOMEM;
break;
};
/* prepare the instruction */
- if (p->ainsn.insn)
+ if (p->ainsn.api.insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -142,9 +142,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.insn) {
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
+ if (p->ainsn.api.insn) {
+ free_insn_slot(p->ainsn.api.insn, 0);
+ p->ainsn.api.insn = NULL;
}
}
@@ -239,9 +239,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.insn) {
+ if (p->ainsn.api.insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.insn;
+ slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
@@ -293,8 +293,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
return;
/* return addr restore if non-branching insn */
- if (cur->ainsn.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.restore);
+ if (cur->ainsn.api.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.api.restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 01f259ec5700..1d76ffb34d40 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -307,7 +307,7 @@ void __init setup_arch(char **cmdline_p)
{
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
- sprintf(init_utsname()->machine, ELF_PLATFORM);
+ sprintf(init_utsname()->machine, UTS_MACHINE);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index c39872a7b03c..19f7e1d6fc24 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -220,7 +220,7 @@ realtime:
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
monotonic:
@@ -243,7 +243,7 @@ monotonic:
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
monotonic_raw:
@@ -264,7 +264,7 @@ monotonic_raw:
clock_nsec=x15, nsec_to_sec=x9
add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
realtime_coarse:
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 71c8076bbc60..d0cf2910d808 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -61,7 +61,7 @@ jiffies = jiffies_64;
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
- *(.entry.tramp.text) \
+ KEEP(*(.entry.tramp.text)) \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
#else
@@ -115,7 +115,8 @@ SECTIONS
*(.discard)
*(.discard.*)
*(.interp .dynamic)
- *(.dynsym .dynstr .hash)
+ *(.dynsym .dynstr .hash .gnu.hash)
+ *(.eh_frame)
}
. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -178,18 +179,18 @@ SECTIONS
. = ALIGN(4);
.altinstructions : {
__alt_instructions = .;
- *(.altinstructions)
+ KEEP(*(.altinstructions))
__alt_instructions_end = .;
}
.altinstr_replacement : {
- *(.altinstr_replacement)
+ KEEP(*(.altinstr_replacement))
}
- .rela : ALIGN(8) {
+ .rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
- __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
- __rela_size = SIZEOF(.rela);
+ __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
+ __rela_size = SIZEOF(.rela.dyn);
. = ALIGN(SEGMENT_ALIGN);
__init_end = .;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a5272c07d1cb..9408780f4479 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,6 @@ config KVM
depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 67613937711f..dfedd4ab1a76 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -68,9 +68,8 @@
stp \ptr, \regB, [\regC], \val
.endm
- .weak memcpy
ENTRY(__memcpy)
-ENTRY(memcpy)
+WEAK(memcpy)
#include "copy_template.S"
ret
ENDPIPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index a5a4459013b1..e3de8f05c21a 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -57,9 +57,8 @@ C_h .req x12
D_l .req x13
D_h .req x14
- .weak memmove
ENTRY(__memmove)
-ENTRY(memmove)
+WEAK(memmove)
cmp dstin, src
b.lo __memcpy
add tmp1, src, count
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index f2670a9f218c..316263c47c00 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -54,9 +54,8 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
- .weak memset
ENTRY(__memset)
-ENTRY(memset)
+WEAK(memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 2ae12825529f..078ae1a95965 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -18,7 +18,6 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select KVM_MMIO
select SRCU
---help---
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c2024ac9d4e8..93a4b562aa5b 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
config KVM
bool
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_EVENTFD
select SRCU
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index b126ce49ae7b..03b5f6bf2584 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -713,7 +713,7 @@ static void cmm_exit(void)
* Return value:
* 0 on success / other on failure
**/
-static int cmm_set_disable(const char *val, struct kernel_param *kp)
+static int cmm_set_disable(const char *val, const struct kernel_param *kp)
{
int disable = simple_strtoul(val, NULL, 10);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52cf0e57..6c39e24abe84 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..9e6ae335f7da 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -51,7 +51,7 @@ enum __force_cpu_type {
};
static int force_cpu_type;
-static int set_cpu_type(const char *str, struct kernel_param *kp)
+static int set_cpu_type(const char *str, const struct kernel_param *kp)
{
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51601246461a..38a0643783bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,7 +19,6 @@ config X86
def_bool y
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
- select ANON_INODES
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index e62f4401e792..a63934de4e58 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -383,3 +383,5 @@
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
+424 i386 pidfd_send_signal sys_pidfd_send_signal
+434 i386 pidfd_open sys_pidfd_open
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index aa51cdd2a11a..e0e52d5fcda8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -332,6 +332,8 @@
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
+424 common pidfd_send_signal sys_pidfd_send_signal
+434 common pidfd_open sys_pidfd_open
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 53b7f53f6207..c88de752cabc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -26,7 +26,6 @@ config KVM
depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
- select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f24bd7249536..a8bb10a78bc7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -614,7 +614,7 @@ enum __force_cpu_type {
static int force_cpu_type;
-static int set_cpu_type(const char *str, struct kernel_param *kp)
+static int set_cpu_type(const char *str, const struct kernel_param *kp)
{
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3096c087b732..d5d26464cac8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1608,7 +1608,8 @@ error:
return -ENODEV;
}
-static int param_set_event_clearing(const char *val, struct kernel_param *kp)
+static int param_set_event_clearing(const char *val,
+ const struct kernel_param *kp)
{
int result = 0;
@@ -1626,7 +1627,8 @@ static int param_set_event_clearing(const char *val, struct kernel_param *kp)
return result;
}
-static int param_get_event_clearing(char *buffer, struct kernel_param *kp)
+static int param_get_event_clearing(char *buffer,
+ const struct kernel_param *kp)
{
switch (ec_event_clearing) {
case ACPI_EC_EVT_TIMING_STATUS:
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 13c308323e94..764047d65d04 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -227,7 +227,8 @@ module_param_cb(trace_method_name, &param_ops_trace_method, &trace_method_name,
module_param_cb(trace_debug_layer, &param_ops_trace_attrib, &acpi_gbl_trace_dbg_layer, 0644);
module_param_cb(trace_debug_level, &param_ops_trace_attrib, &acpi_gbl_trace_dbg_level, 0644);
-static int param_set_trace_state(const char *val, struct kernel_param *kp)
+static int param_set_trace_state(const char *val,
+ const struct kernel_param *kp)
{
acpi_status status;
const char *method = trace_method_name;
@@ -263,7 +264,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
return 0;
}
-static int param_get_trace_state(char *buffer, struct kernel_param *kp)
+static int param_get_trace_state(char *buffer, const struct kernel_param *kp)
{
if (!(acpi_gbl_trace_flags & ACPI_TRACE_ENABLED))
return sprintf(buffer, "disable");
@@ -292,7 +293,8 @@ MODULE_PARM_DESC(aml_debug_output,
"To enable/disable the ACPI Debug Object output.");
/* /sys/module/acpi/parameters/acpica_version */
-static int param_get_acpica_version(char *buffer, struct kernel_param *kp)
+static int param_get_acpica_version(char *buffer,
+ const struct kernel_param *kp)
{
int result;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 08e75d72ddff..7c584e2ea476 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -147,7 +147,7 @@ static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
static int binder_stop_on_user_error;
static int binder_set_stop_on_user_error(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
int ret;
@@ -1968,6 +1968,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc,
}
ret = binder_inc_ref_olocked(ref, strong, target_list);
*rdata = ref->data;
+ if (ret && ref == new_ref) {
+ /*
+ * Cleanup the failed reference here as the target
+ * could now be dead and have already released its
+ * references by now. Calling on the new reference
+ * with strong=0 and a tmp_refs will not decrement
+ * the node. The new_ref gets kfree'd below.
+ */
+ binder_cleanup_ref_olocked(new_ref);
+ ref = NULL;
+ }
+
binder_proc_unlock(proc);
if (new_ref && ref != new_ref)
/*
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 602cbb04bee8..a4e8a2d26f6e 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -239,7 +239,6 @@ source "drivers/base/regmap/Kconfig"
config DMA_SHARED_BUFFER
bool
default n
- select ANON_INODES
help
This option enables the framework for buffer-sharing between
multiple drivers. A buffer is associated with a file using driver
diff --git a/drivers/char/adsprpc.c b/drivers/char/adsprpc.c
index b5b239eb6dc3..69bfaa0bc6f4 100644
--- a/drivers/char/adsprpc.c
+++ b/drivers/char/adsprpc.c
@@ -2747,6 +2747,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
char *fileinfo = NULL;
char single_line[UL_SIZE] = "----------------";
char title[UL_SIZE] = "=========================";
+ unsigned long irq_flags = 0;
fileinfo = kzalloc(DEBUGFS_SIZE, GFP_KERNEL);
if (!fileinfo)
@@ -2809,6 +2810,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n", single_line, single_line,
single_line, single_line, single_line);
+ spin_lock_irqsave(&me->hlock, irq_flags);
hlist_for_each_entry_safe(gmaps, n, &me->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20d|0x%-18llX|0x%-18X|0x%-20lX\n\n",
@@ -2816,18 +2818,21 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
(uint32_t)gmaps->size,
gmaps->va);
}
+ spin_unlock_irqrestore(&me->hlock, irq_flags);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s|%-20s|%-20s\n",
"len", "refs", "raddr", "flags");
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n", single_line, single_line,
single_line, single_line, single_line);
+ spin_lock_irqsave(&me->hlock, irq_flags);
hlist_for_each_entry_safe(gmaps, n, &me->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"0x%-18X|%-20d|%-20lu|%-20u\n",
(uint32_t)gmaps->len, gmaps->refs,
gmaps->raddr, gmaps->flags);
}
+ spin_unlock_irqrestore(&me->hlock, irq_flags);
} else {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"\n%s %13s %d\n", "cid", ":", fl->cid);
@@ -2869,6 +2874,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"0x%-20lX|0x%-20llX|0x%-20zu\n\n",
@@ -2878,6 +2884,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s %d\n\n",
"DEV_MINOR:", fl->dev_minor);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s|%-20s|%-20s\n",
"len", "refs",
@@ -2886,23 +2893,27 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20zu|%-20d|0x%-20lX|%-20d\n\n",
map->len, map->refs, map->raddr,
map->uncached);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s\n", "secure", "attr");
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20d|0x%-20lX\n\n",
map->secure, map->attr);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"\n%s %s %s\n", title,
" LIST OF PENDING SMQCONTEXTS ", title);
diff --git a/drivers/char/diag/diag_dci.c b/drivers/char/diag/diag_dci.c
index 973bc3b1c5b5..ed52ebbb786a 100644
--- a/drivers/char/diag/diag_dci.c
+++ b/drivers/char/diag/diag_dci.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1615,7 +1616,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
return -EIO;
}
- mutex_lock(&driver->dci_mutex);
/* prepare DCI packet */
header.start = CONTROL_CHAR;
header.version = 1;
@@ -1634,7 +1634,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
diag_update_pkt_buffer(driver->apps_dci_buf, write_len,
DCI_PKT_TYPE);
diag_update_sleeping_process(entry->pid, DCI_PKT_TYPE);
- mutex_unlock(&driver->dci_mutex);
return DIAG_DCI_NO_ERROR;
}
@@ -1654,7 +1653,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
entry->proc);
status = DIAG_DCI_SEND_DATA_FAIL;
}
- mutex_unlock(&driver->dci_mutex);
return status;
}
@@ -1734,7 +1732,16 @@ static int diag_send_dci_pkt_remote(unsigned char *data, int len, int tag,
write_len += dci_header_size;
*(int *)(buf + write_len) = tag;
write_len += sizeof(int);
- memcpy(buf + write_len, data, len);
+ if ((write_len + len) < DIAG_MDM_BUF_SIZE) {
+ memcpy(buf + write_len, data, len);
+ } else {
+ pr_err("diag: skip writing invalid length packet, token: %d, pkt_len: %d\n",
+ token, (write_len + len));
+ spin_lock_irqsave(&driver->dci_mempool_lock, flags);
+ diagmem_free(driver, buf, dci_ops_tbl[token].mempool);
+ spin_unlock_irqrestore(&driver->dci_mempool_lock, flags);
+ return -EAGAIN;
+ }
write_len += len;
*(buf + write_len) = CONTROL_CHAR; /* End Terminator */
write_len += sizeof(uint8_t);
@@ -2115,8 +2122,11 @@ static int diag_process_dci_pkt_rsp(unsigned char *buf, int len)
if (temp_entry) {
reg_item = container_of(temp_entry, struct diag_cmd_reg_t,
entry);
- ret = diag_send_dci_pkt(reg_item, req_buf, req_len,
+ mutex_lock(&driver->dci_mutex);
+ if (req_entry)
+ ret = diag_send_dci_pkt(reg_item, req_buf, req_len,
req_entry->tag);
+ mutex_unlock(&driver->dci_mutex);
} else {
DIAG_LOG(DIAG_DEBUG_DCI, "Command not found: %02x %02x %02x\n",
reg_entry.cmd_code, reg_entry.subsys_id,
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index 9f2e3be2c5b8..676c910e990f 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -66,7 +66,7 @@ static void (*specific_poweroff_func)(ipmi_user_t user);
/* Holds the old poweroff function so we can restore it on removal. */
static void (*old_poweroff_func)(void);
-static int set_param_ifnum(const char *val, struct kernel_param *kp)
+static int set_param_ifnum(const char *val, const struct kernel_param *kp)
{
int rv = param_set_int(val, kp);
if (rv)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2f8ff63bbbe4..1ad69e8d6d6d 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1357,7 +1357,7 @@ static unsigned int num_slave_addrs;
#define IPMI_MEM_ADDR_SPACE 1
static char *addr_space_to_str[] = { "i/o", "mem" };
-static int hotmod_handler(const char *val, struct kernel_param *kp);
+static int hotmod_handler(const char *val, const struct kernel_param *kp);
module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200);
MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See"
@@ -1814,7 +1814,7 @@ static struct smi_info *smi_info_alloc(void)
return info;
}
-static int hotmod_handler(const char *val, struct kernel_param *kp)
+static int hotmod_handler(const char *val, const struct kernel_param *kp)
{
char *str = kstrdup(val, GFP_KERNEL);
int rv;
diff --git a/drivers/crypto/msm/qce50.c b/drivers/crypto/msm/qce50.c
index 7740a8c59126..3f8c512e0cd9 100644
--- a/drivers/crypto/msm/qce50.c
+++ b/drivers/crypto/msm/qce50.c
@@ -1,6 +1,6 @@
/* Qualcomm Crypto Engine driver.
*
- * Copyright (c) 2012-2018, 2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2018, 2020-2021 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -866,6 +866,11 @@ static int _ce_setup_cipher(struct qce_device *pce_dev, struct qce_req *creq,
break;
case CIPHER_ALG_3DES:
if (creq->mode != QCE_MODE_ECB) {
+ if (ivsize > MAX_IV_LENGTH) {
+ pr_err("%s: error: Invalid length parameter\n",
+ __func__);
+ return -EINVAL;
+ }
_byte_stream_to_net_words(enciv32, creq->iv, ivsize);
pce = cmdlistinfo->encr_cntr_iv;
pce->data = enciv32[0];
@@ -914,6 +919,11 @@ static int _ce_setup_cipher(struct qce_device *pce_dev, struct qce_req *creq,
}
}
if (creq->mode != QCE_MODE_ECB) {
+ if (ivsize > MAX_IV_LENGTH) {
+ pr_err("%s: error: Invalid length parameter\n",
+ __func__);
+ return -EINVAL;
+ }
if (creq->mode == QCE_MODE_XTS)
_byte_stream_swap_to_net_words(enciv32,
creq->iv, ivsize);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index d459cf4b8579..4aaf54132980 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -50,7 +50,7 @@ unsigned int edac_mc_get_poll_msec(void)
return edac_mc_poll_msec;
}
-static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+static int edac_set_poll_msec(const char *val, const struct kernel_param *kp)
{
unsigned int i;
int ret;
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 9cb082a19d8a..d93afc585be4 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -19,7 +19,8 @@
#ifdef CONFIG_EDAC_DEBUG
-static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
+static int edac_set_debug_level(const char *buf,
+ const struct kernel_param *kp)
{
unsigned long val;
int ret;
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 832df3c58e2f..b858494a0a55 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -18,6 +18,11 @@
#include "efistub.h"
+bool efi__get___nokaslr(void)
+{
+ return nokaslr();
+}
+
static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
{
static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e33593ed8f84..1f84f9ac5430 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -38,7 +38,7 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
u64 phys_seed = 0;
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- if (!nokaslr()) {
+ if (!efi__get___nokaslr()) {
status = efi_get_random_bytes(sys_table_arg,
sizeof(phys_seed),
(u8 *)&phys_seed);
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index a5eaa3ac0a5d..f1907c792a52 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -9,6 +9,8 @@ extern int __pure nokaslr(void);
void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
+bool efi__get___nokaslr(void);
+
efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
void **__fh);
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 7c9a801472e5..cd44789da808 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1016,6 +1017,8 @@ static int adreno_probe(struct platform_device *pdev)
adreno_debugfs_init(adreno_dev);
adreno_profile_init(adreno_dev);
+ adreno_dev->perfcounter = false;
+
adreno_sysfs_init(adreno_dev);
kgsl_pwrscale_init(&pdev->dev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR);
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index f433cdef1827..2af892671ee5 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2008-2018 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2018,2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -439,6 +440,12 @@ struct adreno_device {
struct list_head active_list;
spinlock_t active_list_lock;
+
+ /*
+ * @perfcounter: Flag to clear perfcounters across contexts and
+ * controls perfcounter ioctl read
+ */
+ bool perfcounter;
};
/**
diff --git a/drivers/gpu/msm/adreno_compat.c b/drivers/gpu/msm/adreno_compat.c
index 5a8d587d4536..faaec90ba74a 100644
--- a/drivers/gpu/msm/adreno_compat.c
+++ b/drivers/gpu/msm/adreno_compat.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -229,6 +230,14 @@ static long adreno_ioctl_perfcounter_read_compat(
struct kgsl_perfcounter_read_compat *read32 = data;
struct kgsl_perfcounter_read read;
+ /*
+ * When performance counter zapping is enabled, the counters are cleared
+ * across context switches. Reading the counters when they are zapped is
+ * not permitted.
+ */
+ if (!adreno_dev->perfcounter)
+ return -EPERM;
+
read.reads = (struct kgsl_perfcounter_read_group __user *)
(uintptr_t)read32->reads;
read.count = read32->count;
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index 5209f1b2a2e6..3c804ca09371 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1149,7 +1150,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv,
}
/* Make sure that the address is mapped */
- if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) {
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr,
+ ib->size)) {
pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n",
context->id, ib->gpuaddr);
return false;
diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c
index 0d5e3e094c36..579ba2b61863 100644
--- a/drivers/gpu/msm/adreno_ioctl.c
+++ b/drivers/gpu/msm/adreno_ioctl.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -76,6 +77,14 @@ static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
struct kgsl_perfcounter_read *read = data;
+ /*
+ * When performance counter zapping is enabled, the counters are cleared
+ * across context switches. Reading the counters when they are zapped is
+ * not permitted.
+ */
+ if (!adreno_dev->perfcounter)
+ return -EPERM;
+
return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
read->count);
}
diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c
index f893b29e816e..a6c179f7c932 100644
--- a/drivers/gpu/msm/adreno_sysfs.c
+++ b/drivers/gpu/msm/adreno_sysfs.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -233,6 +234,31 @@ static unsigned int _lm_show(struct adreno_device *adreno_dev)
return test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag);
}
+static unsigned int _perfcounter_show(struct adreno_device *adreno_dev)
+{
+ return adreno_dev->perfcounter;
+}
+
+static int _perfcounter_store(struct adreno_device *adreno_dev,
+ unsigned int val)
+{
+ struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+ if (adreno_dev->perfcounter == val)
+ return 0;
+
+ mutex_lock(&device->mutex);
+
+ /* Power down the GPU before changing the state */
+ kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+ adreno_dev->perfcounter = val;
+ kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+ mutex_unlock(&device->mutex);
+
+ return 0;
+}
+
static ssize_t _sysfs_store_u32(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -327,6 +353,7 @@ static ADRENO_SYSFS_BOOL(lm);
static ADRENO_SYSFS_BOOL(preemption);
static ADRENO_SYSFS_BOOL(hwcg);
static ADRENO_SYSFS_BOOL(throttling);
+static ADRENO_SYSFS_BOOL(perfcounter);
@@ -343,6 +370,7 @@ static const struct device_attribute *_attr_list[] = {
&adreno_attr_preemption.attr,
&adreno_attr_hwcg.attr,
&adreno_attr_throttling.attr,
+ &adreno_attr_perfcounter.attr,
NULL,
};
@@ -499,8 +527,11 @@ int adreno_sysfs_init(struct adreno_device *adreno_dev)
ret = kgsl_create_device_sysfs_files(device->dev, _attr_list);
/* Add the PPD directory and files */
- if (ret == 0)
+ if (ret == 0) {
+ /* Notify userspace */
+ kobject_uevent(&device->dev->kobj, KOBJ_ADD);
ppd_sysfs_init(adreno_dev);
+ }
return 0;
}
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index bda0afed50b6..4b9f779e525e 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2008-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1273,7 +1274,7 @@ kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr)
if (!private)
return NULL;
- if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr, 0))
return NULL;
spin_lock(&private->mem_lock);
@@ -2183,6 +2184,15 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr)
ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages,
0, memdesc->size, GFP_KERNEL);
+
+ if (ret)
+ goto out;
+
+ ret = kgsl_cache_range_op(memdesc, 0, memdesc->size,
+ KGSL_CACHE_OP_FLUSH);
+
+ if (ret)
+ sg_free_table(memdesc->sgt);
out:
if (ret) {
for (i = 0; i < npages; i++)
@@ -2295,10 +2305,27 @@ static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
return -EFAULT;
}
- /* Look for the fd that matches this the vma file */
+ /* Look for the fd that matches this vma file */
fd = iterate_fd(current->files, 0, match_file, vma->vm_file);
- if (fd != 0)
+ if (fd) {
dmabuf = dma_buf_get(fd - 1);
+ if (IS_ERR(dmabuf)) {
+ up_read(&current->mm->mmap_sem);
+ return PTR_ERR(dmabuf);
+ }
+ /*
+ * It is possible that the fd obtained from iterate_fd
+ * was closed before passing the fd to dma_buf_get().
+ * Hence dmabuf returned by dma_buf_get() could be
+ * different from vma->vm_file->private_data. Return
+ * failure if this happens.
+ */
+ if (dmabuf != vma->vm_file->private_data) {
+ dma_buf_put(dmabuf);
+ up_read(&current->mm->mmap_sem);
+ return -EBADF;
+ }
+ }
}
if (IS_ERR_OR_NULL(dmabuf)) {
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 98537730cbc9..31e8a7ea5f65 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2011-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -2525,20 +2526,21 @@ static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable,
}
static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
struct kgsl_iommu_pt *pt = pagetable->priv;
if (gpuaddr == 0)
return false;
- if (gpuaddr >= pt->va_start && gpuaddr < pt->va_end)
+ if (gpuaddr >= pt->va_start && (gpuaddr + size) < pt->va_end)
return true;
- if (gpuaddr >= pt->compat_va_start && gpuaddr < pt->compat_va_end)
+ if (gpuaddr >= pt->compat_va_start &&
+ (gpuaddr + size) < pt->compat_va_end)
return true;
- if (gpuaddr >= pt->svm_start && gpuaddr < pt->svm_end)
+ if (gpuaddr >= pt->svm_start && (gpuaddr + size) < pt->svm_end)
return true;
return false;
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index d0a45e713d0d..228f3396ae90 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2017,2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -606,10 +607,11 @@ enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device)
EXPORT_SYMBOL(kgsl_mmu_get_mmutype);
bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
if (PT_OP_VALID(pagetable, addr_in_range))
- return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr);
+ return pagetable->pt_ops->addr_in_range(pagetable,
+ gpuaddr, size);
return false;
}
@@ -645,7 +647,7 @@ EXPORT_SYMBOL(kgsl_mmu_get_qtimer_global_entry);
*/
static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
return (gpuaddr != 0) ? true : false;
}
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index 505fe591a53e..fcf3b188f9f7 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -99,7 +100,8 @@ struct kgsl_mmu_pt_ops {
int (*set_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t);
int (*svm_range)(struct kgsl_pagetable *, uint64_t *, uint64_t *,
uint64_t);
- bool (*addr_in_range)(struct kgsl_pagetable *pagetable, uint64_t);
+ bool (*addr_in_range)(struct kgsl_pagetable *pagetable,
+ uint64_t, uint64_t);
int (*mmu_map_offset)(struct kgsl_pagetable *pt,
uint64_t virtaddr, uint64_t virtoffset,
struct kgsl_memdesc *memdesc, uint64_t physoffset,
@@ -187,7 +189,8 @@ unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu,
u64 ttbr0, uint64_t addr);
enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device);
-bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr);
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr,
+ uint64_t size);
int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable,
uint64_t gpuaddr, uint64_t size);
diff --git a/drivers/gpu/msm/kgsl_pool.c b/drivers/gpu/msm/kgsl_pool.c
index cd5f8e3efe98..77bb50ded36f 100644
--- a/drivers/gpu/msm/kgsl_pool.c
+++ b/drivers/gpu/msm/kgsl_pool.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -83,6 +84,15 @@ _kgsl_pool_zero_page(struct page *p, unsigned int pool_order)
static void
_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p)
{
+ /*
+ * Sanity check to make sure we don't re-pool a page that
+ * somebody else has a reference to.
+ */
+ if (WARN_ON_ONCE(unlikely(page_count(p) > 1))) {
+ __free_pages(p, pool->pool_order);
+ return;
+ }
+
_kgsl_pool_zero_page(p, pool->pool_order);
spin_lock(&pool->list_lock);
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index d15e824e39df..de969713ce59 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -34,7 +34,8 @@ module_param(emulate_scroll_wheel, bool, 0644);
MODULE_PARM_DESC(emulate_scroll_wheel, "Emulate a scroll wheel");
static unsigned int scroll_speed = 32;
-static int param_set_scroll_speed(const char *val, struct kernel_param *kp) {
+static int param_set_scroll_speed(const char *val,
+ const struct kernel_param *kp) {
unsigned long speed;
if (!val || kstrtoul(val, 0, &speed) || speed > 63)
return -EINVAL;
diff --git a/drivers/hwtracing/coresight/coresight-event.c b/drivers/hwtracing/coresight/coresight-event.c
index 0bced010d4c5..8e105af3e4e3 100644
--- a/drivers/hwtracing/coresight/coresight-event.c
+++ b/drivers/hwtracing/coresight/coresight-event.c
@@ -20,12 +20,13 @@
#include <trace/events/exception.h>
static int event_abort_enable;
-static int event_abort_set(const char *val, struct kernel_param *kp);
+static int event_abort_set(const char *val, const struct kernel_param *kp);
module_param_call(event_abort_enable, event_abort_set, param_get_int,
&event_abort_enable, 0644);
static int event_abort_early_panic = 1;
-static int event_abort_on_panic_set(const char *val, struct kernel_param *kp);
+static int event_abort_on_panic_set(const char *val,
+ const struct kernel_param *kp);
module_param_call(event_abort_early_panic, event_abort_on_panic_set,
param_get_int, &event_abort_early_panic, 0644);
@@ -96,7 +97,7 @@ static void event_abort_unregister(void)
unregister_trace_unhandled_abort(event_abort_unhandled_abort, NULL);
}
-static int event_abort_set(const char *val, struct kernel_param *kp)
+static int event_abort_set(const char *val, const struct kernel_param *kp)
{
int ret;
@@ -114,7 +115,8 @@ static int event_abort_set(const char *val, struct kernel_param *kp)
return ret;
}
-static int event_abort_on_panic_set(const char *val, struct kernel_param *kp)
+static int event_abort_on_panic_set(const char *val,
+ const struct kernel_param *kp)
{
int ret;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index f086ef387475..80f5b8c3b004 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -244,7 +244,7 @@ struct chs_geom {
static unsigned int ide_disks;
static struct chs_geom ide_disks_chs[MAX_HWIFS * MAX_DRIVES];
-static int ide_set_disk_chs(const char *str, struct kernel_param *kp)
+static int ide_set_disk_chs(const char *str, const struct kernel_param *kp)
{
int a, b, c = 0, h = 0, s = 0, i, j = 1;
@@ -328,7 +328,7 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
static unsigned int ide_ignore_cable;
-static int ide_set_ignore_cable(const char *s, struct kernel_param *kp)
+static int ide_set_ignore_cable(const char *s, const struct kernel_param *kp)
{
int i, j = 1;
diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 66792e707d74..46dd8288e294 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -4,7 +4,6 @@
menuconfig IIO
tristate "Industrial I/O support"
- select ANON_INODES
help
The industrial I/O subsystem provides a unified framework for
drivers for many different types of embedded sensors using a
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c151bb625179..0b2ba086d198 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -23,7 +23,6 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
- select ANON_INODES
---help---
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 77cc77ba998f..49691557f798 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -150,7 +150,7 @@ static struct kparam_string kp_txselect = {
.string = txselect_list,
.maxlen = MAX_ATTEN_LEN
};
-static int setup_txselect(const char *, struct kernel_param *);
+static int setup_txselect(const char *, const struct kernel_param *);
module_param_call(txselect, setup_txselect, param_get_string,
&kp_txselect, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(txselect,
@@ -6194,7 +6194,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
}
/* handle the txselect parameter changing */
-static int setup_txselect(const char *str, struct kernel_param *kp)
+static int setup_txselect(const char *str, const struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 4173fe977721..f8ae5d9a7729 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -80,7 +80,7 @@ module_param(srpt_srq_size, int, 0444);
MODULE_PARM_DESC(srpt_srq_size,
"Shared receive queue (SRQ) size.");
-static int srpt_get_u64_x(char *buffer, struct kernel_param *kp)
+static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg);
}
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index 292991c90c02..f36f4cb3e7df 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -156,7 +156,7 @@ _set_debug(struct fritzcard *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct fritzcard *card;
diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index d0b6377b9834..64060429aa50 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -244,7 +244,7 @@ _set_debug(struct inf_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct inf_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index a74741d28ca8..ec3749184451 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -111,7 +111,7 @@ _set_debug(struct tiger_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct tiger_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c
index 9815bb4eec9c..1f1446ed8d5f 100644
--- a/drivers/isdn/hardware/mISDN/speedfax.c
+++ b/drivers/isdn/hardware/mISDN/speedfax.c
@@ -94,7 +94,7 @@ _set_debug(struct sfax_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct sfax_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index 741675525b53..796454722610 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -101,7 +101,7 @@ _set_debug(struct w6692_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct w6692_hw *card;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b05afab6eaf2..8aebcf2a30d2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5100,7 +5100,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
return NULL;
}
-static int add_named_array(const char *val, struct kernel_param *kp)
+static int add_named_array(const char *val, const struct kernel_param *kp)
{
/* val must be "md_*" where * is not all digits.
* We allocate an array with a large free minor number, and
@@ -9330,11 +9330,11 @@ static __exit void md_exit(void)
subsys_initcall(md_init);
module_exit(md_exit)
-static int get_ro(char *buffer, struct kernel_param *kp)
+static int get_ro(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%d", start_readonly);
}
-static int set_ro(const char *val, struct kernel_param *kp)
+static int set_ro(const char *val, const struct kernel_param *kp)
{
return kstrtouint(val, 10, (unsigned int *)&start_readonly);
}
diff --git a/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c b/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
index caf69af25601..167ea3bc7a7e 100644
--- a/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
+++ b/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
@@ -162,12 +162,12 @@ static int32_t msm_vfe32_init_qos_parms(struct vfe_device *vfe_dev,
__func__);
kfree(ds_settings);
kfree(ds_regs);
- } else {
+ } else {
for (i = 0; i < ds_entries; i++)
msm_camera_io_w(ds_settings[i],
vfebase + ds_regs[i]);
- kfree(ds_regs);
- kfree(ds_settings);
+ kfree(ds_regs);
+ kfree(ds_settings);
}
} else {
kfree(ds_regs);
diff --git a/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c b/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
index da42439c60d6..e03635b3a833 100644
--- a/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
+++ b/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
@@ -2271,8 +2271,8 @@ static int msm_isp_process_done_buf(struct vfe_device *vfe_dev,
MSM_ISP_BUFFER_STATE_PUT_BUF;
buf->buf_debug.put_state_last ^= 1;
rc = vfe_dev->buf_mgr->ops->buf_done(vfe_dev->buf_mgr,
- buf->bufq_handle, buf->buf_idx, time_stamp,
- frame_id, stream_info->runtime_output_format);
+ buf->bufq_handle, buf->buf_idx, time_stamp,
+ frame_id, stream_info->runtime_output_format);
if (rc == -EFAULT) {
msm_isp_halt_send_error(vfe_dev,
ISP_EVENT_BUF_FATAL_ERROR);
diff --git a/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c b/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
index 44a4ad7822cb..6e2eaddbac79 100644
--- a/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
+++ b/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
@@ -963,7 +963,7 @@ static int msm_eeprom_get_dt_data(struct msm_eeprom_ctrl_t *e_ctrl)
of_node = e_ctrl->pdev->dev.of_node;
if (!of_node) {
- pr_err("%s: %d of_node is NULL\n", __func__ , __LINE__);
+ pr_err("%s: %d of_node is NULL\n", __func__, __LINE__);
return -ENOMEM;
}
rc = msm_camera_get_dt_vreg_data(of_node, &power_info->cam_vreg,
diff --git a/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c b/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
index e0b703321dae..419371a9f318 100644
--- a/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
+++ b/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
@@ -144,6 +144,7 @@ int msm_camera_fill_vreg_params(struct camera_vreg_t *cam_vreg,
break;
}
}
+ break;
case CAM_V_CUSTOM2:
for (j = 0; j < num_vreg; j++) {
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index def22b7fef9c..2c4933363135 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -2202,7 +2202,7 @@ static int uvc_reset_resume(struct usb_interface *intf)
* Module parameters
*/
-static int uvc_clock_param_get(char *buffer, struct kernel_param *kp)
+static int uvc_clock_param_get(char *buffer, const struct kernel_param *kp)
{
if (uvc_clock_param == CLOCK_MONOTONIC)
return sprintf(buffer, "CLOCK_MONOTONIC");
@@ -2210,7 +2210,7 @@ static int uvc_clock_param_get(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "CLOCK_REALTIME");
}
-static int uvc_clock_param_set(const char *val, struct kernel_param *kp)
+static int uvc_clock_param_set(const char *val, const struct kernel_param *kp)
{
if (strncasecmp(val, "clock_", strlen("clock_")) == 0)
val += strlen("clock_");
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 207370d68c17..06cb6933b0ea 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -99,7 +99,7 @@ module_param(mpt_channel_mapping, int, 0);
MODULE_PARM_DESC(mpt_channel_mapping, " Mapping id's to channels (default=0)");
static int mpt_debug_level;
-static int mpt_set_debug_level(const char *val, struct kernel_param *kp);
+static int mpt_set_debug_level(const char *val, const struct kernel_param *kp);
module_param_call(mpt_debug_level, mpt_set_debug_level, param_get_int,
&mpt_debug_level, 0600);
MODULE_PARM_DESC(mpt_debug_level,
@@ -242,7 +242,7 @@ pci_enable_io_access(struct pci_dev *pdev)
pci_write_config_word(pdev, PCI_COMMAND, command_reg);
}
-static int mpt_set_debug_level(const char *val, struct kernel_param *kp)
+static int mpt_set_debug_level(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
MPT_ADAPTER *ioc;
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
index 9b083448814d..5bd127727d8e 100644
--- a/drivers/misc/ibmasm/ibmasm.h
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -211,7 +211,7 @@ void ibmasmfs_unregister(void);
void ibmasmfs_add_sp(struct service_processor *sp);
/* uart */
-#ifdef CONFIG_SERIAL_8250
+#if IS_ENABLED(CONFIG_SERIAL_8250)
void ibmasm_register_uart(struct service_processor *sp);
void ibmasm_unregister_uart(struct service_processor *sp);
#else
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index ab2184003c29..71becb5c4dec 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -1127,7 +1127,8 @@ static void kgdbts_put_char(u8 chr)
ts.run_test(0, chr);
}
-static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
+static int param_set_kgdbts_var(const char *kmessage,
+ const struct kernel_param *kp)
{
size_t len = strlen(kmessage);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index e2c0057737e6..b468819bc436 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -431,7 +431,7 @@ static int block2mtd_setup2(const char *val)
}
-static int block2mtd_setup(const char *val, struct kernel_param *kp)
+static int block2mtd_setup(const char *val, const struct kernel_param *kp)
{
#ifdef MODULE
return block2mtd_setup2(val);
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 9734e6903fe6..d312c1b5a78e 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -269,7 +269,7 @@ error:
return ret;
}
-static int phram_param_call(const char *val, struct kernel_param *kp)
+static int phram_param_call(const char *val, const struct kernel_param *kp)
{
#ifdef MODULE
return phram_setup(val);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 2af841cb7097..f45394281354 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1456,7 +1456,7 @@ static int __init bytes_str_to_int(const char *str)
* This function returns zero in case of success and a negative error code in
* case of error.
*/
-static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+static int __init ubi_mtd_param_parse(const char *val, const struct kernel_param *kp)
{
int i, len;
struct mtd_dev_param *p;
diff --git a/drivers/net/wireless/wcnss/wcnss_wlan.c b/drivers/net/wireless/wcnss/wcnss_wlan.c
index fbf2b49c7832..e15d4b5a58eb 100644
--- a/drivers/net/wireless/wcnss/wcnss_wlan.c
+++ b/drivers/net/wireless/wcnss/wcnss_wlan.c
@@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(wcnss_get_wlan_mac_address);
static int enable_wcnss_suspend_notify;
static int enable_wcnss_suspend_notify_set(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
int ret;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 04bb9b1b14da..12720e807117 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -1,7 +1,7 @@
/*
* Device tree based initialization code for reserved memory.
*
- * Copyright (c) 2013, 2015 The Linux Foundation. All Rights Reserved.
+ * Copyright (c) 2013, 2015, 2017 The Linux Foundation. All Rights Reserved.
* Copyright (c) 2013,2014 Samsung Electronics Co., Ltd.
* http://www.samsung.com
* Author: Marek Szyprowski <m.szyprowski@samsung.com>
@@ -22,7 +22,7 @@
#include <linux/of_reserved_mem.h>
#include <linux/sort.h>
-#define MAX_RESERVED_REGIONS 16
+#define MAX_RESERVED_REGIONS 32
static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
static int reserved_mem_count;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index a098f8324afd..4f7f3ddcea06 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -772,7 +772,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
}
EXPORT_SYMBOL(pci_disable_link_state);
-static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
+static int pcie_aspm_set_policy(const char *val,
+ const struct kernel_param *kp)
{
int i;
struct pcie_link_state *link;
@@ -799,7 +800,7 @@ static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
return 0;
}
-static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp)
+static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
{
int i, cnt = 0;
for (i = 0; i < ARRAY_SIZE(policy_str); i++)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d6d671a925e1..0111dad43c9a 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1047,7 +1047,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
const struct pmu_probe_info *probe_table)
{
const struct of_device_id *of_id;
- const int (*init_fn)(struct arm_pmu *);
+ int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
int ret = -ENODEV;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 466a0d0162c3..c345136022bf 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9247,7 +9247,7 @@ static struct ibm_init_struct ibms_init[] __initdata = {
},
};
-static int __init set_ibm_param(const char *val, struct kernel_param *kp)
+static int __init set_ibm_param(const char *val, const struct kernel_param *kp)
{
unsigned int i;
struct ibm_struct *ibm;
diff --git a/drivers/power/reset/msm-poweroff.c b/drivers/power/reset/msm-poweroff.c
index c14bc93de717..f267ec9d80c0 100644
--- a/drivers/power/reset/msm-poweroff.c
+++ b/drivers/power/reset/msm-poweroff.c
@@ -94,7 +94,7 @@ static void *kaslr_imem_addr;
#endif
static bool scm_dload_supported;
-static int dload_set(const char *val, struct kernel_param *kp);
+static int dload_set(const char *val, const struct kernel_param *kp);
/* interface for exporting attributes */
struct reset_attribute {
struct attribute attr;
@@ -183,7 +183,7 @@ static void enable_emergency_dload_mode(void)
pr_err("Failed to set secure EDLOAD mode: %d\n", ret);
}
-static int dload_set(const char *val, struct kernel_param *kp)
+static int dload_set(const char *val, const struct kernel_param *kp)
{
int ret;
int old_val = download_mode;
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index d7597c08fa11..a70fa0eb3871 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -32,13 +32,13 @@ MODULE_AUTHOR("Open-FCoE.org");
MODULE_DESCRIPTION("FIP discovery protocol and FCoE transport for FCoE HBAs");
MODULE_LICENSE("GPL v2");
-static int fcoe_transport_create(const char *, struct kernel_param *);
-static int fcoe_transport_destroy(const char *, struct kernel_param *);
+static int fcoe_transport_create(const char *, const struct kernel_param *);
+static int fcoe_transport_destroy(const char *, const struct kernel_param *);
static int fcoe_transport_show(char *buffer, const struct kernel_param *kp);
static struct fcoe_transport *fcoe_transport_lookup(struct net_device *device);
static struct fcoe_transport *fcoe_netdev_map_lookup(struct net_device *device);
-static int fcoe_transport_enable(const char *, struct kernel_param *);
-static int fcoe_transport_disable(const char *, struct kernel_param *);
+static int fcoe_transport_enable(const char *, const struct kernel_param *);
+static int fcoe_transport_disable(const char *, const struct kernel_param *);
static int libfcoe_device_notification(struct notifier_block *notifier,
ulong event, void *ptr);
@@ -842,7 +842,8 @@ EXPORT_SYMBOL(fcoe_ctlr_destroy_store);
*
* Returns: 0 for success
*/
-static int fcoe_transport_create(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_create(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -907,7 +908,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_destroy(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -951,7 +953,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_disable(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -985,7 +988,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_enable(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_enable(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 8d52afd1f71d..c8430f428e14 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -100,7 +100,7 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc, int sleep_flag);
*
*/
static int
-_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
+_scsih_set_fwfault_debug(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 0e39bb1489ac..f02fc3e504c0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -283,7 +283,7 @@ struct _scsi_io_transfer {
* Note: The logging levels are defined in mpt3sas_debug.h.
*/
static int
-_scsih_set_debug_level(const char *val, struct kernel_param *kp)
+_scsih_set_debug_level(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/soc/qcom/hab/hab.h b/drivers/soc/qcom/hab/hab.h
index cb7ed52050b2..bdc8f54b08b4 100644
--- a/drivers/soc/qcom/hab/hab.h
+++ b/drivers/soc/qcom/hab/hab.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2019, 2021, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -98,6 +98,12 @@ enum hab_payload_type {
/* make sure concascaded name is less than this value */
#define MAX_VMID_NAME_SIZE 30
+/*
+ * The maximum value of payload_count in struct export_desc
+ * Max u32_t size_bytes from hab_ioctl.h(0xFFFFFFFF) / page size(0x1000)
+ */
+#define MAX_EXP_PAYLOAD_COUNT 0xFFFFF
+
#define HABCFG_FILE_SIZE_MAX 256
#define HABCFG_MMID_AREA_MAX (MM_ID_MAX/100)
diff --git a/drivers/soc/qcom/hab/hab_msg.c b/drivers/soc/qcom/hab/hab_msg.c
index 9f92074665df..4494cfbf0e0b 100644
--- a/drivers/soc/qcom/hab/hab_msg.c
+++ b/drivers/soc/qcom/hab/hab_msg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2019, 2021, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -11,6 +11,7 @@
*
*/
#include "hab.h"
+#include "hab_grantable.h"
static int hab_rx_queue_empty(struct virtual_channel *vchan)
{
@@ -207,6 +208,8 @@ int hab_msg_recv(struct physical_channel *pchan,
struct export_desc *exp_desc = NULL, exp_ack = {0};
struct timeval tv = {0};
unsigned long long rx_mpm_tv = 0;
+ size_t exp_desc_size_expected = 0;
+ struct compressed_pfns *pfn_table = NULL;
/* get the local virtual channel if it isn't an open message */
if (payload_type != HAB_PAYLOAD_TYPE_INIT &&
@@ -295,8 +298,11 @@ int hab_msg_recv(struct physical_channel *pchan,
break;
case HAB_PAYLOAD_TYPE_EXPORT:
- if (sizebytes > HAB_HEADER_SIZE_MASK) {
- pr_err("%s exp size too large %zd header %zd\n",
+ exp_desc_size_expected = sizeof(struct export_desc)
+ + sizeof(struct compressed_pfns);
+ if (sizebytes > (size_t)HAB_HEADER_SIZE_MASK) ||
+ sizebytes < exp_desc_size_expected) {
+ pr_err("%s exp size too large/small %zu header %zu\n",
pchan->name, sizebytes, sizeof(*exp_desc));
break;
}
@@ -328,6 +334,53 @@ int hab_msg_recv(struct physical_channel *pchan,
hab_export_enqueue(vchan, exp_desc); /* for local use */
hab_send_export_ack(vchan, pchan, &exp_ack); /* ack exporter */
+ /*
+ * We should do all the checks here.
+ * But in order to improve performance, we put the
+ * checks related to exp->payload_count and
+ * pfn_table->region[i].size into function pages_list_create.
+ * So any potential usage of such data from the remote side
+ * after the checks here and before the checks in
+ * pages_list_create needs to add some more checks if necessary.
+ */
+ pfn_table = (struct compressed_pfns *)exp_desc->payload;
+ if (pfn_table->nregions <= 0 ||
+ (pfn_table->nregions >
+ SIZE_MAX / sizeof(struct region)) ||
+ (SIZE_MAX - exp_desc_size_expected <
+ pfn_table->nregions * sizeof(struct region))) {
+ pr_err("%s nregions is too large or negative, nregions:%d!\n",
+ pchan->name, pfn_table->nregions);
+ kfree(exp_desc);
+ break;
+ }
+
+ if (pfn_table->nregions > exp_desc->payload_count) {
+ pr_err("%s nregions %d greater than payload_count %d\n",
+ pchan->name, pfn_table->nregions,
+ exp_desc->payload_count);
+ kfree(exp_desc);
+ break;
+ }
+
+ if (exp_desc->payload_count > MAX_EXP_PAYLOAD_COUNT) {
+ pr_err("payload_count out of range: %d size overflow\n",
+ exp_desc->payload_count);
+ kfree(exp_desc);
+ break;
+ }
+
+ exp_desc_size_expected +=
+ pfn_table->nregions * sizeof(struct region);
+ if (sizebytes != exp_desc_size_expected) {
+ pr_err("%s exp size not equal %zu expect %zu\n",
+ pchan->name, sizebytes, exp_desc_size_expected);
+ kfree(exp_desc);
+ break;
+ }
+
+ hab_export_enqueue(vchan, exp_desc);
+ hab_send_export_ack(vchan, pchan, exp_desc);
break;
case HAB_PAYLOAD_TYPE_EXPORT_ACK:
diff --git a/drivers/soc/qcom/hab/khab_test.c b/drivers/soc/qcom/hab/khab_test.c
index 6add7af0489f..6a89afb3d726 100644
--- a/drivers/soc/qcom/hab/khab_test.c
+++ b/drivers/soc/qcom/hab/khab_test.c
@@ -241,13 +241,13 @@ int hab_perf_test(long testId)
return ret;
}
-static int kick_hab_perf_test(const char *val, struct kernel_param *kp);
-static int get_hab_perf_result(char *buffer, struct kernel_param *kp);
+static int kick_hab_perf_test(const char *val, const struct kernel_param *kp);
+static int get_hab_perf_result(char *buffer, const struct kernel_param *kp);
module_param_call(perf_test, kick_hab_perf_test, get_hab_perf_result,
NULL, S_IRUSR | S_IWUSR);
-static int kick_hab_perf_test(const char *val, struct kernel_param *kp)
+static int kick_hab_perf_test(const char *val, const struct kernel_param *kp)
{
long testId;
int err = kstrtol(val, 10, &testId);
@@ -258,7 +258,7 @@ static int kick_hab_perf_test(const char *val, struct kernel_param *kp)
return hab_perf_test(testId);
}
-static int get_hab_perf_result(char *buffer, struct kernel_param *kp)
+static int get_hab_perf_result(char *buffer, const struct kernel_param *kp)
{
return strlcpy(buffer, g_perf_test_result,
strlen(g_perf_test_result)+1);
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 9f46e0f56b1a..1cda5df5843b 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1665,10 +1665,10 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
data.allocation.flags, true);
if (IS_ERR(handle))
return PTR_ERR(handle);
- pass_to_user(handle);
data.allocation.handle = handle->id;
cleanup_handle = handle;
+ pass_to_user(handle);
break;
}
case ION_IOC_FREE:
@@ -1713,11 +1713,12 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
} else {
+ data.handle.handle = handle->id;
handle = pass_to_user(handle);
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- else
- data.handle.handle = handle->id;
+ data.handle.handle = 0;
+ }
}
mutex_unlock(&client->lock);
break;
diff --git a/drivers/staging/android/ion/msm/msm_ion.c b/drivers/staging/android/ion/msm/msm_ion.c
index 1a3aca407c9e..0a61301385f2 100644
--- a/drivers/staging/android/ion/msm/msm_ion.c
+++ b/drivers/staging/android/ion/msm/msm_ion.c
@@ -628,6 +628,9 @@ static int check_vaddr_bounds(unsigned long start, unsigned long end)
struct vm_area_struct *vma;
int ret = 1;
+ if (!start)
+ goto out;
+
if (end < start)
goto out;
@@ -781,20 +784,28 @@ long msm_ion_custom_ioctl(struct ion_client *client,
down_read(&mm->mmap_sem);
- start = (unsigned long)data.flush_data.vaddr +
- data.flush_data.offset;
- end = start + data.flush_data.length;
-
- if (start && check_vaddr_bounds(start, end)) {
- pr_err("%s: virtual address %pK is out of bounds\n",
+ if ((unsigned long)data.flush_data.vaddr >
+ (ULONG_MAX - data.flush_data.offset)) {
+ pr_err("%s: Integer overflow detected for %pK\n",
__func__, data.flush_data.vaddr);
ret = -EINVAL;
} else {
- ret = ion_do_cache_op(
- client, handle, data.flush_data.vaddr,
- data.flush_data.offset,
- data.flush_data.length, cmd);
+ start = (unsigned long)data.flush_data.vaddr +
+ data.flush_data.offset;
+ end = start + data.flush_data.length;
+
+ if (check_vaddr_bounds(start, end)) {
+ pr_err("%s: virtual address %pK is out of bounds\n",
+ __func__, data.flush_data.vaddr);
+ ret = -EINVAL;
+ } else {
+ ret = ion_do_cache_op(
+ client, handle, data.flush_data.vaddr,
+ data.flush_data.offset,
+ data.flush_data.length, cmd);
+ }
}
+
up_read(&mm->mmap_sem);
ion_free_nolock(client, handle);
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
index a5eea199f733..f8338f0e9d3f 100644
--- a/drivers/staging/rdma/ipath/ipath_init_chip.c
+++ b/drivers/staging/rdma/ipath/ipath_init_chip.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
*/
static ushort ipath_kpiobufs;
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
+static int ipath_set_kpiobufs(const char *str, const struct kernel_param *kp);
module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
&ipath_kpiobufs, S_IWUSR | S_IRUGO);
@@ -1014,7 +1014,7 @@ done:
return ret;
}
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
+static int ipath_set_kpiobufs(const char *str, const struct kernel_param *kp)
{
struct ipath_devdata *dd;
unsigned long flags;
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 0314e78e31ff..ec24e82da169 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -232,7 +232,8 @@ static void kgdboc_put_char(u8 chr)
kgdb_tty_line, chr);
}
-static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp)
+static int param_set_kgdboc_var(const char *kmessage,
+ const struct kernel_param *kp)
{
size_t len = strlen(kmessage);
diff --git a/drivers/usb/chipidea/otg_fsm.h b/drivers/usb/chipidea/otg_fsm.h
index 2689375ae5da..262d6ef8df7c 100644
--- a/drivers/usb/chipidea/otg_fsm.h
+++ b/drivers/usb/chipidea/otg_fsm.h
@@ -62,7 +62,7 @@
/* SSEND time before SRP */
#define TB_SSEND_SRP (1500) /* minimum 1.5 sec, section:5.1.2 */
-#ifdef CONFIG_USB_OTG_FSM
+#if IS_ENABLED(CONFIG_USB_OTG_FSM)
int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci);
int ci_otg_fsm_work(struct ci_hdrc *ci);
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 0a6810ea4107..61c77107e97d 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1942,6 +1942,9 @@ unknown:
if (w_index != 0x5 || (w_value >> 8))
break;
interface = w_value & 0xFF;
+ if (interface >= MAX_CONFIG_INTERFACES ||
+ !os_desc_cfg->interface[interface])
+ break;
buf[6] = w_index;
if (w_length == 0x0A) {
count = count_ext_prop(os_desc_cfg,
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 9ba61939eed6..2c352d21c0cc 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -16,7 +16,7 @@
#include <linux/usb/ch9.h>
#ifdef CONFIG_USB_CONFIGFS_F_ACC
-extern int acc_ctrlrequest(struct usb_composite_dev *cdev,
+extern int acc_ctrlrequest_composite(struct usb_composite_dev *cdev,
const struct usb_ctrlrequest *ctrl);
void acc_disconnect(void);
#endif
@@ -1620,7 +1620,7 @@ static int android_setup(struct usb_gadget *gadget,
#ifdef CONFIG_USB_CONFIGFS_F_ACC
if (value < 0)
- value = acc_ctrlrequest(cdev, c);
+ value = acc_ctrlrequest_composite(cdev, c);
#endif
if (value < 0)
diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
index b5c1ad06f8be..0bfd486a4414 100644
--- a/drivers/usb/gadget/function/f_accessory.c
+++ b/drivers/usb/gadget/function/f_accessory.c
@@ -1066,6 +1066,26 @@ err:
}
EXPORT_SYMBOL_GPL(acc_ctrlrequest);
+int acc_ctrlrequest_composite(struct usb_composite_dev *cdev,
+ const struct usb_ctrlrequest *ctrl)
+{
+ u16 w_length = le16_to_cpu(ctrl->wLength);
+
+ if (w_length > USB_COMP_EP0_BUFSIZ) {
+ if (ctrl->bRequestType & USB_DIR_IN) {
+ /* Cast away the const, we are going to overwrite on purpose. */
+ __le16 *temp = (__le16 *)&ctrl->wLength;
+
+ *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ);
+ w_length = USB_COMP_EP0_BUFSIZ;
+ } else {
+ return -EINVAL;
+ }
+ }
+ return acc_ctrlrequest(cdev, ctrl);
+}
+EXPORT_SYMBOL_GPL(acc_ctrlrequest_composite);
+
static int
__acc_function_bind(struct usb_configuration *c,
struct usb_function *f, bool configfs)
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 1d13d79d5070..3046066680d9 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -650,14 +650,18 @@ static int rndis_set_response(struct rndis_params *params,
rndis_set_cmplt_type *resp;
rndis_resp_t *r;
+ BufLength = le32_to_cpu(buf->InformationBufferLength);
+ BufOffset = le32_to_cpu(buf->InformationBufferOffset);
+ if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
+ return -EINVAL;
+
r = rndis_add_response(params, sizeof(rndis_set_cmplt_type));
if (!r)
return -ENOMEM;
resp = (rndis_set_cmplt_type *)r->buf;
- BufLength = le32_to_cpu(buf->InformationBufferLength);
- BufOffset = le32_to_cpu(buf->InformationBufferOffset);
-
#ifdef VERBOSE_DEBUG
pr_debug("%s: Length: %d\n", __func__, BufLength);
pr_debug("%s: Offset: %d\n", __func__, BufOffset);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index f2b4fdd1f49d..a069150d97f3 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1851,8 +1851,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
spin_lock_irq (&dev->lock);
value = -EINVAL;
if (dev->buf) {
+ spin_unlock_irq(&dev->lock);
kfree(kbuf);
- goto fail;
+ return value;
}
dev->buf = kbuf;
@@ -1900,8 +1901,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
value = usb_gadget_probe_driver(&gadgetfs_driver);
if (value != 0) {
- kfree (dev->buf);
- dev->buf = NULL;
+ spin_lock_irq(&dev->lock);
+ goto fail;
} else {
/* at this point "good" hardware has for the first time
* let the USB the host see us. alternatively, if users
@@ -1917,6 +1918,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return value;
fail:
+ dev->config = NULL;
+ dev->hs_config = NULL;
+ dev->dev = NULL;
spin_unlock_irq (&dev->lock);
pr_debug ("%s: %s fail %Zd, %p\n", shortname, __func__, value, dev);
kfree (dev->buf);
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 2938153fe7b1..9b1963a91ad5 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -269,7 +269,7 @@ static struct console usbcons = {
void usb_serial_console_disconnect(struct usb_serial *serial)
{
- if (serial && serial->port && serial->port[0]
+ if (serial && serial->port[0]
&& serial->port[0] == usbcons_info.port) {
usb_serial_console_exit();
usb_serial_put(serial);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 850d86ca685b..f08809323762 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -24,7 +24,6 @@ menuconfig VFIO
select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
- select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/vfio.txt for more details.
diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c
index b30e7d87804b..04e75d5efb19 100644
--- a/drivers/video/console/dummycon.c
+++ b/drivers/video/console/dummycon.c
@@ -41,12 +41,69 @@ static void dummycon_init(struct vc_data *vc, int init)
vc_resize(vc, DUMMY_COLUMNS, DUMMY_ROWS);
}
-static int dummycon_dummy(void)
+static void dummycon_deinit(struct vc_data *vc)
+{
+}
+
+static void dummycon_clear(struct vc_data *vc, int a, int b, int c, int d)
+{
+}
+
+static void dummycon_putc(struct vc_data *vc, int a, int b, int c)
+{
+}
+
+static void dummycon_putcs(struct vc_data *vc, const unsigned short *s, int a, int b, int c)
+{
+}
+
+static void dummycon_cursor(struct vc_data *vc, int a)
+{
+}
+
+static int dummycon_scroll(struct vc_data *vc, int a, int b, int c, int d)
{
return 0;
}
-#define DUMMY (void *)dummycon_dummy
+static int dummycon_switch(struct vc_data *vc)
+{
+ return 0;
+}
+
+static int dummycon_blank(struct vc_data *vc, int a, int b)
+{
+ return 0;
+}
+
+static int dummycon_font_set(struct vc_data *vc, struct console_font *f, unsigned u)
+{
+ return 0;
+}
+
+static int dummycon_font_default(struct vc_data *vc, struct console_font *f, char *c)
+{
+ return 0;
+}
+
+static int dummycon_font_copy(struct vc_data *vc, int a)
+{
+ return 0;
+}
+
+static void con_bmove(struct vc_data *vc, int a, int b, int c, int d, int e, intf)
+{
+}
+
+static int con_set_palette(struct vc_data *vc, unsigned char *p)
+{
+ return 0;
+}
+
+static int con_scrolldelta(struct vc_data *vc, int x)
+{
+ return 0;
+}
/*
* The console `switch' structure for the dummy console
@@ -58,19 +115,19 @@ const struct consw dummy_con = {
.owner = THIS_MODULE,
.con_startup = dummycon_startup,
.con_init = dummycon_init,
- .con_deinit = DUMMY,
- .con_clear = DUMMY,
- .con_putc = DUMMY,
- .con_putcs = DUMMY,
- .con_cursor = DUMMY,
- .con_scroll = DUMMY,
.con_bmove = DUMMY,
- .con_switch = DUMMY,
- .con_blank = DUMMY,
- .con_font_set = DUMMY,
- .con_font_default = DUMMY,
- .con_font_copy = DUMMY,
.con_set_palette = DUMMY,
.con_scrolldelta = DUMMY,
+ .con_deinit = dummycon_deinit,
+ .con_clear = dummycon_clear,
+ .con_putc = dummycon_putc,
+ .con_putcs = dummycon_putcs,
+ .con_cursor = dummycon_cursor,
+ .con_scroll = dummycon_scroll,
+ .con_switch = dummycon_switch,
+ .con_blank = dummycon_blank,
+ .con_font_set = dummycon_font_set,
+ .con_font_default = dummycon_font_default,
+ .con_font_copy = dummycon_font_copy,
};
EXPORT_SYMBOL_GPL(dummy_con);
diff --git a/drivers/video/fbdev/msm/mdss_dsi_status.c b/drivers/video/fbdev/msm/mdss_dsi_status.c
index 0490bcdfbfad..ef07291d4d9a 100644
--- a/drivers/video/fbdev/msm/mdss_dsi_status.c
+++ b/drivers/video/fbdev/msm/mdss_dsi_status.c
@@ -217,7 +217,8 @@ static int fb_event_callback(struct notifier_block *self,
return 0;
}
-static int param_dsi_status_disable(const char *val, struct kernel_param *kp)
+static int param_dsi_status_disable(const char *val,
+ const struct kernel_param *kp)
{
int ret = 0;
int int_val;
@@ -232,7 +233,7 @@ static int param_dsi_status_disable(const char *val, struct kernel_param *kp)
return ret;
}
-static int param_set_interval(const char *val, struct kernel_param *kp)
+static int param_set_interval(const char *val, const struct kernel_param *kp)
{
int ret = 0;
int int_val;
diff --git a/fs/Makefile b/fs/Makefile
index debf0742b3c9..969caba6b282 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
-obj-$(CONFIG_ANON_INODES) += anon_inodes.o
+obj-y += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
obj-$(CONFIG_TIMERFD) += timerfd.o
obj-$(CONFIG_EVENTFD) += eventfd.o
diff --git a/fs/afs/file.c b/fs/afs/file.c
index cf8a07e282a6..5290f6e83605 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -123,11 +123,10 @@ static void afs_file_readpage_read_complete(struct page *page,
/*
* read page from file, directory or symlink, given a key to use
*/
-int afs_page_filler(void *data, struct page *page)
+static int __afs_page_filler(struct key *key, struct page *page)
{
struct inode *inode = page->mapping->host;
struct afs_vnode *vnode = AFS_FS_I(inode);
- struct key *key = data;
size_t len;
off_t offset;
int ret;
@@ -209,6 +208,13 @@ error:
return ret;
}
+int afs_page_filler(struct file *data, struct page *page)
+{
+ struct key *key = (struct key *)data;
+
+ return __afs_page_filler(key, page);
+}
+
/*
* read page from file, directory or symlink, given a file to nominate the key
* to be used
@@ -221,14 +227,14 @@ static int afs_readpage(struct file *file, struct page *page)
if (file) {
key = file->private_data;
ASSERT(key != NULL);
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
} else {
struct inode *inode = page->mapping->host;
key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
} else {
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
key_put(key);
}
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1330b2a695ff..64452ba25988 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -499,7 +499,7 @@ extern const struct file_operations afs_file_operations;
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_page_filler(void *, struct page *);
+extern int afs_page_filler(struct file *, struct page *);
/*
* flock.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b212e82a..05ba277e6269 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
- mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
+ mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
free_page((unsigned long) devname);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 98198c57370b..acd43792ef82 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -439,8 +439,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_uid();
- wq->gid = current_gid();
+ wq->uid = current_cred()->uid;
+ wq->gid = current_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 1ea643faf04b..d8afb402f966 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -241,7 +241,8 @@ compose_mount_options_err:
* @fullpath: full path in UNC format
* @ref: server's referral
*/
-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
+ struct cifs_sb_info *cifs_sb,
const char *fullpath, const struct dfs_info3_param *ref)
{
struct vfsmount *mnt;
@@ -255,7 +256,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
- mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
+ mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
kfree(mountdata);
kfree(devname);
return mnt;
@@ -330,7 +331,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = ERR_PTR(-EINVAL);
break;
}
- mnt = cifs_dfs_do_refmount(cifs_sb,
+ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
full_path, referrals + i);
cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
__func__, referrals[i].node_name, mnt);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3530e1c3ff56..d7111b8ce36a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -186,9 +186,9 @@ static const struct super_operations debugfs_super_operations = {
static struct vfsmount *debugfs_automount(struct path *path)
{
- struct vfsmount *(*f)(void *);
- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
- return f(d_inode(path->dentry)->i_private);
+ debugfs_automount_t f;
+ f = (debugfs_automount_t)path->dentry->d_fsdata;
+ return f(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
*/
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 60f03b78914e..9323b9a8bc72 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -377,9 +377,8 @@ err:
* and will start a new collection. Eventually caller must submit the last
* segment if present.
*/
-static int readpage_strip(void *data, struct page *page)
+static int __readpage_strip(struct page_collect *pcol, struct page *page)
{
- struct page_collect *pcol = data;
struct inode *inode = pcol->inode;
struct exofs_i_info *oi = exofs_i(inode);
loff_t i_size = i_size_read(inode);
@@ -470,6 +469,13 @@ fail:
return ret;
}
+static int readpage_strip(struct file *data, struct page *page)
+{
+ struct page_collect *pcol = (struct page_collect *)data;
+
+ return __readpage_strip(pcol, page);
+}
+
static int exofs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
@@ -499,7 +505,7 @@ static int _readpage(struct page *page, bool read_4_write)
_pcol_init(&pcol, 1, page->mapping->host);
pcol.read_4_write = read_4_write;
- ret = readpage_strip(&pcol, page);
+ ret = __readpage_strip(&pcol, page);
if (ret) {
EXOFS_ERR("_readpage => %d\n", ret);
return ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 969beec3ff7e..87ab6150343b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -379,14 +379,15 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -1028,6 +1029,7 @@ struct ext4_inode_info {
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
@@ -1070,9 +1072,15 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota,
+ * enable enforcement for hidden
+ * quota files */
+#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable
+ * enforcement for hidden quota
+ * files */
+#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
+ * enforcement */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
@@ -1283,7 +1291,7 @@ struct ext4_super_block {
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
@@ -1801,7 +1809,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1843,6 +1852,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
@@ -2565,6 +2579,7 @@ extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 685a26e9540f..ea26a0dcb87b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -781,6 +781,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4fbede7d7e2f..b65680c5404b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4300,6 +4300,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -4314,6 +4322,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
if ((!(flags & EXT4_IGET_SPECIAL) &&
(ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
@@ -4389,12 +4398,20 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4706,6 +4723,7 @@ static int ext4_do_update_inode(handle_t *handle,
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
@@ -4718,6 +4736,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4795,6 +4814,15 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 313a61626d2d..9c6fe28fe27e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3218,6 +3218,12 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EXDEV;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -3527,6 +3533,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
!ext4_has_encryption_key(new_dir)))
return -ENOKEY;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3746,6 +3757,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode)))
return -EXDEV;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6d3b72c959c8..9f2ca5e2531a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1081,8 +1081,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1115,6 +1115,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1170,7 +1171,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
@@ -1230,6 +1231,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
@@ -1449,8 +1451,11 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
@@ -1735,13 +1740,17 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
- "mount options ignored.");
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
+ ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return 0;
+ }
+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -2635,6 +2644,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
@@ -3871,7 +3886,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -5020,6 +5035,48 @@ restore_opts:
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -5052,6 +5109,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
@@ -5239,7 +5301,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
@@ -5270,14 +5333,21 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
+ };
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9fc6c2597dcc..1907299d4296 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -875,9 +875,9 @@ struct fuse_fill_data {
unsigned nr_pages;
};
-static int fuse_readpages_fill(void *_data, struct page *page)
+static int fuse_readpages_fill(struct file *_data, struct page *page)
{
- struct fuse_fill_data *data = _data;
+ struct fuse_fill_data *data = (struct fuse_fill_data *)_data;
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ca9c492a1885..adecbb7a13fe 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -29,7 +29,7 @@ static struct kmem_cache *fuse_inode_cachep;
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
-static int set_global_limit(const char *val, struct kernel_param *kp);
+static int set_global_limit(const char *val, const struct kernel_param *kp);
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
@@ -811,7 +811,7 @@ static void sanitize_global_limit(unsigned *limit)
*limit = (1 << 16) - 1;
}
-static int set_global_limit(const char *val, struct kernel_param *kp)
+static int set_global_limit(const char *val, const struct kernel_param *kp)
{
int rv;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 582ef53f2104..6e7b6cb3f6cd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -476,7 +476,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
*
*/
-static int __gfs2_readpage(void *file, struct page *page)
+static int __gfs2_readpage(struct file *file, struct page *page)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 21e719199a84..7fb45ee36897 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -740,10 +740,15 @@ err_out:
static void kernfs_release_file(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
- if (!(kn->flags & KERNFS_HAS_RELEASE))
- return;
+ /*
+ * @of is guaranteed to have no other file operations in flight and
+ * we just want to synchronize release and drain paths.
+ * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * here because drain path may be called from places which can
+ * cause circular dependency.
+ */
+ lockdep_assert_held(&kernfs_open_file_mutex);
- mutex_lock(&of->mutex);
if (!of->released) {
/*
* A file is never detached without being released and we
@@ -753,7 +758,6 @@ static void kernfs_release_file(struct kernfs_node *kn,
kn->attr.ops->release(of);
of->released = true;
}
- mutex_unlock(&of->mutex);
}
static int kernfs_fop_release(struct inode *inode, struct file *filp)
@@ -761,7 +765,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
struct kernfs_open_file *of = kernfs_of(filp);
- kernfs_release_file(kn, of);
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ mutex_lock(&kernfs_open_file_mutex);
+ kernfs_release_file(kn, of);
+ mutex_unlock(&kernfs_open_file_mutex);
+ }
+
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
@@ -794,7 +803,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
if (kn->flags & KERNFS_HAS_MMAP)
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
- kernfs_release_file(kn, of);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ kernfs_release_file(kn, of);
}
mutex_unlock(&kernfs_open_file_mutex);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index ef34e31a9d57..a71e996d6c55 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -50,7 +50,8 @@ static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
if (scops && scops->show_path)
return scops->show_path(sf, node, root);
- return seq_dentry(sf, dentry, " \t\n\\");
+ seq_dentry(sf, dentry, " \t\n\\");
+ return 0;
}
const struct super_operations kernfs_sops = {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f038d4ac9aec..7890779f44b5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -526,7 +526,7 @@ static struct ctl_table nlm_sysctl_root[] = {
*/
#define param_set_min_max(name, type, which_strtol, min, max) \
-static int param_set_##name(const char *val, struct kernel_param *kp) \
+static int param_set_##name(const char *val, const struct kernel_param *kp) \
{ \
char *endp; \
__typeof__(type) num = which_strtol(val, &endp, 0); \
diff --git a/fs/locks.c b/fs/locks.c
index 2c8e1e429cf7..4faeb3f6d3dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -230,6 +230,7 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
@@ -934,7 +935,8 @@ out:
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1144,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1166,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1182,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1260,7 +1264,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1272,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -2165,6 +2169,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2178,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2242,6 +2246,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
}
}
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2398,6 +2403,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2420,10 +2426,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80c8ebc..9a202f7ed755 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -33,9 +33,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
return submit_bio_wait(rw, &bio);
}
-static int bdev_readpage(void *_sb, struct page *page)
+static int bdev_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
struct block_device *bdev = logfs_super(sb)->s_bdev;
int err;
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c501449450d..4ae7d17f96e3 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -122,9 +122,9 @@ static void logfs_mtd_sync(struct super_block *sb)
mtd_sync(mtd);
}
-static int logfs_mtd_readpage(void *_sb, struct page *page)
+static int logfs_mtd_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
int err;
err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d4c4..48085ab8bcd5 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -174,7 +174,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
if (!logfs_exist_block(dir, index))
continue;
page = read_cache_page(dir->i_mapping, index,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return page;
dd = kmap_atomic(page);
@@ -306,7 +306,7 @@ static int logfs_readdir(struct file *file, struct dir_context *ctx)
continue;
}
page = read_cache_page(dir->i_mapping, pos,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
dd = kmap(page);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609465..96322622870c 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -151,7 +151,7 @@ struct logfs_device_ops {
struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
struct page *(*find_last_sb)(struct super_block *sb, u64 *ofs);
int (*write_sb)(struct super_block *sb, struct page *page);
- int (*readpage)(void *_sb, struct page *page);
+ int (*readpage)(struct file *_sb, struct page *page);
void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write);
@@ -485,7 +485,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s,
#endif
/* dev_mtd.c */
-#ifdef CONFIG_MTD
+#if IS_ENABLED(CONFIG_MTD)
int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
diff --git a/fs/namei.c b/fs/namei.c
index 8f350e39be96..d6d2d0dec826 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,7 @@
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
+#include <linux/init_task.h>
#include <asm/uaccess.h>
#include "internal.h"
diff --git a/fs/namespace.c b/fs/namespace.c
index 2443cdb9ce86..6f9a6bee9d87 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1015,6 +1015,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
+struct vfsmount *
+vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
+ const char *name, void *data)
+{
+ /* Until it is worked out how to pass the user namespace
+ * through from the parent mount to the submount don't support
+ * unprivileged mounts with submounts.
+ */
+ if (mountpoint->d_sb->s_user_ns != &init_user_ns)
+ return ERR_PTR(-EPERM);
+
+ return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
+}
+EXPORT_SYMBOL_GPL(vfs_submount);
+
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -2545,10 +2560,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
return -ENODEV;
if (user_ns != &init_user_ns) {
- if (!(type->fs_flags & FS_USERNS_MOUNT)) {
- put_filesystem(type);
- return -EPERM;
- }
/* Only in special cases allow devices from mounts
* created outside the initial user namespace.
*/
@@ -2868,7 +2879,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME);
+ MS_STRICTATIME | MS_SUBMOUNT);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d29ad4e02d33..f1804be48a3a 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -230,7 +230,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
- return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
+ return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
}
/**
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f592672373cb..b2a7f72eb116 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
mountdata->hostname,
mountdata->mnt_path);
- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
+ mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
if (!IS_ERR(mnt))
break;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0bb580174cb3..a25985705cd5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -343,7 +343,7 @@ struct nfs_readdesc {
};
static int
-readpage_async_filler(void *data, struct page *page)
+readpage_async_filler(struct file *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
struct nfs_page *new;
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index e5f911bd80d2..390827fa82ef 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,7 +1,6 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
- select ANON_INODES
default n
---help---
Say Y here to enable fanotify support. fanotify is a file access
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b981fc0c8379..0161c74e76e2 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,6 +1,5 @@
config INOTIFY_USER
bool "Inotify support for userspace"
- select ANON_INODES
select FSNOTIFY
default y
---help---
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b5cf27dcb18a..cb3703393264 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -88,13 +88,13 @@ struct workqueue_struct *user_dlm_worker;
*/
#define DLMFS_CAPABILITIES "bast stackglue"
static int param_set_dlmfs_capabilities(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
printk(KERN_ERR "%s: readonly parameter\n", kp->name);
return -EINVAL;
}
static int param_get_dlmfs_capabilities(char *buffer,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
return strlcpy(buffer, DLMFS_CAPABILITIES,
strlen(DLMFS_CAPABILITIES) + 1);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8da1fc940a86..e16afc80f810 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3202,6 +3202,15 @@ static const struct file_operations proc_tgid_base_operations = {
.llseek = default_llseek,
};
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ if (!d_is_dir(file->f_path.dentry) ||
+ (file->f_op != &proc_tgid_base_operations))
+ return ERR_PTR(-EBADF);
+
+ return proc_pid(file_inode(file));
+}
+
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
diff --git a/fs/read_write.c b/fs/read_write.c
index 7b175b9134ec..27023e8f531e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -21,9 +21,6 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -656,7 +653,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, iter_fn_t fn)
+ loff_t *ppos, int type)
{
struct kiocb kiocb;
ssize_t ret;
@@ -664,7 +661,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- ret = fn(&kiocb, iter);
+ if (type == READ)
+ ret = filp->f_op->read_iter(&kiocb, iter);
+ else
+ ret = filp->f_op->write_iter(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
@@ -672,7 +672,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, io_fn_t fn)
+ loff_t *ppos, int type)
{
ssize_t ret = 0;
@@ -680,7 +680,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+ if (type == READ) {
+ nr = filp->f_op->read(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ } else {
+ nr = filp->f_op->write(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ }
if (nr < 0) {
if (!ret)
@@ -783,8 +789,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = import_iovec(type, uvector, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
@@ -798,19 +802,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
@@ -957,8 +956,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = compat_import_iovec(type, uvector, nr_segs,
UIO_FASTIOV, &iov, &iter);
@@ -972,19 +969,14 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
diff --git a/fs/super.c b/fs/super.c
index 34f1dc72064d..55cd8843b49a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -476,6 +476,10 @@ struct super_block *sget_userns(struct file_system_type *type,
struct super_block *old;
int err;
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
retry:
spin_lock(&sb_lock);
if (test) {
@@ -502,7 +506,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags, user_ns);
+ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -547,8 +551,15 @@ struct super_block *sget(struct file_system_type *type,
{
struct user_namespace *user_ns = current_user_ns();
+ /* We don't yet pass the user namespace of the parent
+ * mount through to here so always use &init_user_ns
+ * until that changes.
+ */
+ if (flags & MS_SUBMOUNT)
+ user_ns = &init_user_ns;
+
/* Ensure the requestor has permissions over the target filesystem */
- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return sget_userns(type, test, set, flags, user_ns, data);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 52ab4aa7f214..2014a1bc8f3d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -89,7 +89,7 @@
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
#define MCOUNT_REC() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_mcount_loc) = .; \
- *(__mcount_loc) \
+ KEEP(*(__mcount_loc)) \
VMLINUX_SYMBOL(__stop_mcount_loc) = .;
#else
#define MCOUNT_REC()
@@ -123,10 +123,10 @@
#ifdef CONFIG_EVENT_TRACING
#define FTRACE_EVENTS() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_ftrace_events) = .; \
- *(_ftrace_events) \
+ KEEP(*(_ftrace_events)) \
VMLINUX_SYMBOL(__stop_ftrace_events) = .; \
VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \
- *(_ftrace_enum_map) \
+ KEEP(*(_ftrace_enum_map)) \
VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .;
#else
#define FTRACE_EVENTS()
@@ -169,8 +169,8 @@
#define _OF_TABLE_1(name) \
. = ALIGN(8); \
VMLINUX_SYMBOL(__##name##_of_table) = .; \
- *(__##name##_of_table) \
- *(__##name##_of_table_end)
+ KEEP(*(__##name##_of_table)) \
+ KEEP(*(__##name##_of_table_end))
#define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
#define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
@@ -196,9 +196,14 @@
*(.dtb.init.rodata) \
VMLINUX_SYMBOL(__dtb_end) = .;
-/* .data section */
+/*
+ * .data section
+ * -fdata-sections generates .data.identifier which needs to be pulled in
+ * with .data, but don't want to pull in .data..stuff which has its own
+ * requirements. Same for bss.
+ */
#define DATA_DATA \
- *(.data) \
+ *(.data .data.[0-9a-zA-Z_]*) \
*(.ref.data) \
*(.data..shared_aligned) /* percpu related */ \
MEM_KEEP(init.data) \
@@ -281,28 +286,28 @@
/* PCI quirks */ \
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
- *(.pci_fixup_early) \
+ KEEP(*(.pci_fixup_early)) \
VMLINUX_SYMBOL(__end_pci_fixups_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_header) = .; \
- *(.pci_fixup_header) \
+ KEEP(*(.pci_fixup_header)) \
VMLINUX_SYMBOL(__end_pci_fixups_header) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_final) = .; \
- *(.pci_fixup_final) \
+ KEEP(*(.pci_fixup_final)) \
VMLINUX_SYMBOL(__end_pci_fixups_final) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_enable) = .; \
- *(.pci_fixup_enable) \
+ KEEP(*(.pci_fixup_enable)) \
VMLINUX_SYMBOL(__end_pci_fixups_enable) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \
- *(.pci_fixup_resume) \
+ KEEP(*(.pci_fixup_resume)) \
VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \
- *(.pci_fixup_resume_early) \
+ KEEP(*(.pci_fixup_resume_early)) \
VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \
- *(.pci_fixup_suspend) \
+ KEEP(*(.pci_fixup_suspend)) \
VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_suspend_late) = .; \
- *(.pci_fixup_suspend_late) \
+ KEEP(*(.pci_fixup_suspend_late)) \
VMLINUX_SYMBOL(__end_pci_fixups_suspend_late) = .; \
} \
\
@@ -318,76 +323,76 @@
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab) = .; \
- *(SORT(___ksymtab+*)) \
+ KEEP(*(SORT(___ksymtab+*))) \
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
- *(SORT(___ksymtab_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
- *(SORT(___ksymtab_unused+*)) \
+ KEEP(*(SORT(___ksymtab_unused+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
- *(SORT(___ksymtab_unused_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
- *(SORT(___ksymtab_gpl_future+*)) \
+ KEEP(*(SORT(___ksymtab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
} \
\
/* Kernel symbol table: Normal symbols */ \
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab) = .; \
- *(SORT(___kcrctab+*)) \
+ KEEP(*(SORT(___kcrctab+*))) \
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
- *(SORT(___kcrctab_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
- *(SORT(___kcrctab_unused+*)) \
+ KEEP(*(SORT(___kcrctab_unused+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
- *(SORT(___kcrctab_unused_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
- *(SORT(___kcrctab_gpl_future+*)) \
+ KEEP(*(SORT(___kcrctab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
- *(__ksymtab_strings) \
+ KEEP(*(__ksymtab_strings)) \
} \
\
/* __*init sections */ \
@@ -400,7 +405,7 @@
/* Built-in module parameters. */ \
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___param) = .; \
- *(__param) \
+ KEEP(*(__param)) \
VMLINUX_SYMBOL(__stop___param) = .; \
} \
\
@@ -422,7 +427,7 @@
#define SECURITY_INIT \
.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .; \
}
@@ -432,7 +437,7 @@
ALIGN_FUNCTION(); \
*(.text.hot .text.hot.*) \
*(.text .text.fixup) \
- *(.text.unlikely .text.unlikely.*) \
+ *(.text.unlikely .text.unlikely .text.*) \
*(.text.unknown .text.unknown.*) \
*(.ref.text) \
*(.text.asan.* .text.tsan.*) \
@@ -481,7 +486,7 @@
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
/* Section used for early init (in .S files) */
-#define HEAD_TEXT *(.head.text)
+#define HEAD_TEXT KEEP(*(.head.text))
#define HEAD_TEXT_SECTION \
.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \
@@ -495,7 +500,7 @@
. = ALIGN(align); \
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ex_table) = .; \
- *(__ex_table) \
+ KEEP(*(__ex_table)) \
VMLINUX_SYMBOL(__stop___ex_table) = .; \
}
@@ -521,11 +526,12 @@
/* init and exit section handling */
#define INIT_DATA \
+ KEEP(*(SORT(___kentry+*))) \
*(.init.data) \
MEM_DISCARD(init.data) \
KERNEL_CTORS() \
MCOUNT_REC() \
- *(.init.rodata) \
+ *(.init.rodata .init.rodata.*) \
FTRACE_EVENTS() \
TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \
@@ -543,7 +549,7 @@
EARLYCON_TABLE()
#define INIT_TEXT \
- *(.init.text) \
+ *(.init.text .init.text.*) \
*(.text.startup) \
MEM_DISCARD(init.text)
@@ -560,7 +566,7 @@
MEM_DISCARD(exit.text)
#define EXIT_CALL \
- *(.exitcall.exit)
+ KEEP(*(.exitcall.exit))
/*
* bss (Block Started by Symbol) - uninitialized data
@@ -587,7 +593,7 @@
BSS_FIRST_SECTIONS \
*(.bss..page_aligned) \
*(.dynbss) \
- *(.bss) \
+ *(.bss .bss.[0-9a-zA-Z_]*) \
*(COMMON) \
}
@@ -636,7 +642,7 @@
. = ALIGN(8); \
__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___bug_table) = .; \
- *(__bug_table) \
+ KEEP(*(__bug_table)) \
VMLINUX_SYMBOL(__stop___bug_table) = .; \
}
#else
@@ -665,17 +671,17 @@
#define INIT_SETUP(initsetup_align) \
. = ALIGN(initsetup_align); \
VMLINUX_SYMBOL(__setup_start) = .; \
- *(.init.setup) \
+ KEEP(*(.init.setup)) \
VMLINUX_SYMBOL(__setup_end) = .;
#define INIT_CALLS_LEVEL(level) \
VMLINUX_SYMBOL(__initcall##level##_start) = .; \
- *(.initcall##level##.init) \
- *(.initcall##level##s.init) \
+ KEEP(*(.initcall##level##.init)) \
+ KEEP(*(.initcall##level##s.init)) \
#define INIT_CALLS \
VMLINUX_SYMBOL(__initcall_start) = .; \
- *(.initcallearly.init) \
+ KEEP(*(.initcallearly.init)) \
INIT_CALLS_LEVEL(0) \
INIT_CALLS_LEVEL(1) \
INIT_CALLS_LEVEL(2) \
@@ -689,21 +695,21 @@
#define CON_INITCALL \
VMLINUX_SYMBOL(__con_initcall_start) = .; \
- *(.con_initcall.init) \
+ KEEP(*(.con_initcall.init)) \
VMLINUX_SYMBOL(__con_initcall_end) = .;
#define SECURITY_INITCALL \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .;
#ifdef CONFIG_BLK_DEV_INITRD
#define INIT_RAM_FS \
. = ALIGN(4); \
VMLINUX_SYMBOL(__initramfs_start) = .; \
- *(.init.ramfs) \
+ KEEP(*(.init.ramfs)) \
. = ALIGN(8); \
- *(.init.ramfs.info)
+ KEEP(*(.init.ramfs.info))
#else
#define INIT_RAM_FS
#endif
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d169d274e31b..26dd401e12ae 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -1,7 +1,6 @@
#ifndef _BPF_CGROUP_H
#define _BPF_CGROUP_H
-#include <linux/bpf.h>
#include <linux/jump_label.h>
#include <uapi/linux/bpf.h>
@@ -55,6 +54,9 @@ int __cgroup_bpf_run_filter(struct sock *sk,
struct sk_buff *skb,
enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type);
+
/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
({ \
@@ -78,6 +80,16 @@ int __cgroup_bpf_run_filter(struct sock *sk,
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk) { \
+ __ret = __cgroup_bpf_run_filter_sk(sk, \
+ BPF_CGROUP_INET_SOCK_CREATE); \
+ } \
+ __ret; \
+})
+
#else
struct cgroup_bpf {};
@@ -86,6 +98,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8eb2e503fc3e..77476208f1b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,7 +36,10 @@ struct bpf_map_ops {
};
struct bpf_map {
- atomic_t refcnt;
+ /* 1st cacheline with read-mostly members of which some
+ * are also accessed in fast-path (e.g. ops, max_entries).
+ */
+ const struct bpf_map_ops *ops ____cacheline_aligned;
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
@@ -44,10 +47,15 @@ struct bpf_map {
u32 map_flags;
u32 pages;
bool unpriv_array;
- struct user_struct *user;
- const struct bpf_map_ops *ops;
- struct work_struct work;
+ /* 7 bytes hole */
+
+ /* 2nd cacheline with misc members to avoid false sharing
+ * particularly with refcounting.
+ */
+ struct user_struct *user ____cacheline_aligned;
+ atomic_t refcnt;
atomic_t usercnt;
+ struct work_struct work;
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -299,6 +307,8 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
+int __bpf_prog_charge(struct user_struct *user, u32 pages);
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
@@ -376,6 +386,16 @@ static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
{
return ERR_PTR(-EOPNOTSUPP);
}
+
+static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ return 0;
+}
+
+static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+}
+
static inline int bpf_obj_get_user(const char __user *pathname)
{
return -EOPNOTSUPP;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ee2772f769c2..d3036be98027 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -601,6 +601,7 @@ struct sock_cgroup_data {
struct {
u8 is_data : 1;
u8 no_refcnt : 1;
+ u8 unused : 6;
u8 padding;
u16 prioidx;
u32 classid;
@@ -610,6 +611,7 @@ struct sock_cgroup_data {
u32 classid;
u16 prioidx;
u8 padding;
+ u8 unused : 6;
u8 no_refcnt : 1;
u8 is_data : 1;
} __packed;
@@ -642,7 +644,7 @@ static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
{
- struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
+ struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
return;
@@ -659,7 +661,7 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
- struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
+ struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
if (sock_cgroup_classid(&skcd_buf) == classid)
return;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 24dd42910d7c..c4bdce045492 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -5,6 +5,8 @@
* syscall compatibility layer.
*/
+#include <linux/types.h>
+
#ifdef CONFIG_COMPAT
#include <linux/stat.h>
@@ -711,9 +713,22 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question. For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
#else
#define is_compat_task() (0)
+static inline bool in_compat_syscall(void) { return false; }
#endif /* CONFIG_COMPAT */
+
#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bc8077e5e688..759fb028acc8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -201,6 +201,29 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
# define unreachable() do { } while (1)
#endif
+/*
+ * KENTRY - kernel entry point
+ * This can be used to annotate symbols (functions or data) that are used
+ * without their linker symbol being referenced explicitly. For example,
+ * interrupt vector handlers, or functions in the kernel image that are found
+ * programatically.
+ *
+ * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those
+ * are handled in their own way (with KEEP() in linker scripts).
+ *
+ * KENTRY can be avoided if the symbols in question are marked as KEEP() in the
+ * linker script. For example an architecture could KEEP() its entire
+ * boot/exception vector code rather than annotate each function and data.
+ */
+#ifndef KENTRY
+# define KENTRY(sym) \
+ extern typeof(sym) sym; \
+ static const unsigned long __kentry_##sym \
+ __used \
+ __attribute__((section("___kentry" "+" #sym ), used)) \
+ = (unsigned long)&sym;
+#endif
+
#ifndef RELOC_HIDE
# define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 19c066dce1da..51362ff607fc 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -60,9 +60,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
const char *dest);
+typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data);
void debugfs_remove(struct dentry *dentry);
diff --git a/include/linux/export.h b/include/linux/export.h
index 96e45ea463e7..a84c76fdebd8 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -1,5 +1,6 @@
#ifndef _LINUX_EXPORT_H
#define _LINUX_EXPORT_H
+
/*
* Export symbols from the kernel to modules. Forked from module.h
* to reduce the amount of pointless cruft we feed to gcc when only
@@ -42,11 +43,11 @@ extern struct module __this_module;
#ifdef CONFIG_MODVERSIONS
/* Mark the CRC weak since genksyms apparently decides not to
* generate a checksums for some symbols */
-#define __CRC_SYMBOL(sym, sec) \
- extern __visible void *__crc_##sym __attribute__((weak)); \
- static const unsigned long __kcrctab_##sym \
- __used \
- __attribute__((section("___kcrctab" sec "+" #sym), unused)) \
+#define __CRC_SYMBOL(sym, sec) \
+ extern __visible void *__crc_##sym __attribute__((weak)); \
+ static const unsigned long __kcrctab_##sym \
+ __used \
+ __attribute__((section("___kcrctab" sec "+" #sym), used)) \
= (unsigned long) &__crc_##sym;
#else
#define __CRC_SYMBOL(sym, sec)
@@ -59,8 +60,7 @@ extern struct module __this_module;
static const char __kstrtab_##sym[] \
__attribute__((section("__ksymtab_strings"), aligned(1))) \
= VMLINUX_SYMBOL_STR(sym); \
- extern const struct kernel_symbol __ksymtab_##sym; \
- __visible const struct kernel_symbol __ksymtab_##sym \
+ static const struct kernel_symbol __ksymtab_##sym \
__used \
__attribute__((section("___ksymtab" sec "+" #sym), unused)) \
= { (unsigned long)&sym, __kstrtab_##sym }
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a85d7b71e329..c0b08bbe85b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -244,8 +244,16 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
return *this_cpu_ptr(ops->disabled);
}
+#ifdef CONFIG_CFI_CLANG
+/* Use a C stub with the correct type for CFI */
+static inline void ftrace_stub(unsigned long a0, unsigned long a1,
+ struct ftrace_ops *op, struct pt_regs *regs)
+{
+}
+#else
extern void ftrace_stub(unsigned long a0, unsigned long a1,
struct ftrace_ops *op, struct pt_regs *regs);
+#endif
#else /* !CONFIG_FUNCTION_TRACER */
/*
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a5f251d76018..858031d58899 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -535,7 +535,7 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order);
void page_alloc_init(void);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
-void drain_local_pages(struct zone *zone);
+void drain_local_pages(void *zone);
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
void page_alloc_init_late(void);
diff --git a/include/linux/init.h b/include/linux/init.h
index 3561ea30ed8c..9ae494724b1b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -163,24 +163,8 @@ extern bool initcall_debug;
#ifndef __ASSEMBLY__
-#ifdef CONFIG_LTO
-/* Work around a LTO gcc problem: when there is no reference to a variable
- * in a module it will be moved to the end of the program. This causes
- * reordering of initcalls which the kernel does not like.
- * Add a dummy reference function to avoid this. The function is
- * deleted by the linker.
- */
-#define LTO_REFERENCE_INITCALL(x) \
- ; /* yes this is needed */ \
- static __used __exit void *reference_##x(void) \
- { \
- return &x; \
- }
-#else
-#define LTO_REFERENCE_INITCALL(x)
-#endif
-
-/* initcalls are now grouped by functionality into separate
+/*
+ * initcalls are now grouped by functionality into separate
* subsections. Ordering inside the subsections is determined
* by link order.
* For backwards compatibility, initcall() puts the call in
@@ -188,12 +172,16 @@ extern bool initcall_debug;
*
* The `id' arg to __define_initcall() is needed so that multiple initcalls
* can point at the same handler without causing duplicate-symbol build errors.
+ *
+ * Initcalls are run by placing pointers in initcall sections that the
+ * kernel iterates at runtime. The linker can do dead code / data elimination
+ * and remove that completely, so the initcall sections have to be marked
+ * as KEEP() in the linker script.
*/
#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn; \
- LTO_REFERENCE_INITCALL(__initcall_##fn##id)
+ __attribute__((__section__(".initcall" #id ".init"))) = fn;
/*
* Early initcalls run before initializing SMP.
@@ -229,15 +217,15 @@ extern bool initcall_debug;
#define __initcall(fn) device_initcall(fn)
-#define __exitcall(fn) \
+#define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
-#define console_initcall(fn) \
- static initcall_t __initcall_##fn \
+#define console_initcall(fn) \
+ static initcall_t __initcall_##fn \
__used __section(.con_initcall.init) = fn
-#define security_initcall(fn) \
- static initcall_t __initcall_##fn \
+#define security_initcall(fn) \
+ static initcall_t __initcall_##fn \
__used __section(.security_initcall.init) = fn
struct obs_kernel_param {
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 52666d90ca94..060db5c327be 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -225,19 +225,11 @@ struct kparam_array
VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
/* Obsolete - use module_param_cb() */
-#define module_param_call(name, set, get, arg, perm) \
- static const struct kernel_param_ops __param_ops_##name = \
- { .flags = 0, (void *)set, (void *)get }; \
+#define module_param_call(name, _set, _get, arg, perm) \
+ static const struct kernel_param_ops __param_ops_##name = \
+ { .flags = 0, .set = _set, .get = _get }; \
__module_param_call(MODULE_PARAM_PREFIX, \
- name, &__param_ops_##name, arg, \
- (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
-
-/* We don't get oldget: it's often a new-style param_get_uint, etc. */
-static inline int
-__check_old_set_param(int (*oldset)(const char *, struct kernel_param *))
-{
- return 0;
-}
+ name, &__param_ops_##name, arg, perm, -1, 0)
#ifdef CONFIG_SYSFS
extern void kernel_param_lock(struct module *mod);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b606d8f57adf..bbe48613cc80 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -90,6 +90,9 @@ struct file_system_type;
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data);
+extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
+ struct file_system_type *type,
+ const char *name, void *data);
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d2f4a732b3e8..ca7b7cbcb23a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -243,7 +243,7 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x)
__GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
}
-typedef int filler_t(void *, struct page *);
+typedef int filler_t(struct file *, struct page *);
pgoff_t page_cache_next_hole(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
@@ -392,7 +392,7 @@ extern int read_cache_pages(struct address_space *mapping,
static inline struct page *read_mapping_page(struct address_space *mapping,
pgoff_t index, void *data)
{
- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+ filler_t *filler = mapping->a_ops->readpage;
return read_cache_page(mapping, index, filler, data);
}
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 97b745ddece5..1d847d3de245 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
#define _LINUX_PID_H
#include <linux/rcupdate.h>
+#include <linux/wait.h>
enum pid_type
{
@@ -62,6 +63,8 @@ struct pid
unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
+ /* wait queue for pidfd notifications */
+ wait_queue_head_t wait_pidfd;
struct rcu_head rcu;
struct upid numbers[1];
};
@@ -74,6 +77,8 @@ struct pid_link
struct pid *pid;
};
+extern const struct file_operations pidfd_fops;
+
static inline struct pid *get_pid(struct pid *pid)
{
if (pid)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b326d0a0cace..de3f442b7ab6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -41,6 +41,7 @@ extern void *proc_get_parent_data(const struct inode *);
extern void proc_remove(struct proc_dir_entry *);
extern void remove_proc_entry(const char *, struct proc_dir_entry *);
extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
+extern struct pid *tgid_pidfd_to_pid(const struct file *file);
#else /* CONFIG_PROC_FS */
@@ -72,6 +73,11 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
#define remove_proc_entry(name, parent) do {} while (0)
static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
+static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ return ERR_PTR(-EBADF);
+}
+
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_PROC_UID
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 19bac77f81b1..8020dca6c61f 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -98,6 +98,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
if (!is_a_nulls(first))
WRITE_ONCE(first->pprev, &n->next);
}
+
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4f2933707b13..0c62b1d98ea0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -222,6 +222,11 @@ struct sk_buff;
#endif
extern int sysctl_max_skb_frags;
+/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
+ * segment using its current segmentation instead.
+ */
+#define GSO_BY_FRAGS 0xFFFF
+
typedef struct skb_frag_struct skb_frag_t;
struct skb_frag_struct {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5d2779aa4bbe..cf569d676909 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -879,6 +879,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
struct file_handle __user *handle,
int flags);
asmlinkage long sys_setns(int fd, int nstype);
+asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
asmlinkage long sys_process_vm_readv(pid_t pid,
const struct iovec __user *lvec,
unsigned long liovcnt,
@@ -909,4 +910,8 @@ asmlinkage long sys_membarrier(int cmd, int flags);
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
+asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
+ siginfo_t __user *info,
+ unsigned int flags);
+
#endif
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 87c094961bd5..32342754643a 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}
-#define udp_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry(__sk, list) \
+ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
- hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry_rcu(__sk, list) \
+ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 07aed691fe00..fa06e7418f41 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -90,6 +90,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
+int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 7ff588ca6817..28332bdac333 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -53,6 +53,7 @@ struct sock *__inet6_lookup_established(struct net *net,
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -60,6 +61,7 @@ struct sock *inet6_lookup_listener(struct net *net,
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -71,12 +73,12 @@ static inline struct sock *__inet6_lookup(struct net *net,
if (sk)
return sk;
- return inet6_lookup_listener(net, hashinfo, saddr, sport,
+ return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
}
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb,
+ struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif)
@@ -86,16 +88,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
- &ipv6_hdr(skb)->saddr, sport,
+ return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
iif);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif);
+
+int inet6_hash(struct sock *sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index de2e3ade6102..50f635c2c536 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -207,12 +207,16 @@ void inet_hashinfo_init(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
-void __inet_hash(struct sock *sk, struct sock *osk);
-void inet_hash(struct sock *sk);
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard));
+int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr,
const unsigned short hnum,
@@ -220,10 +224,11 @@ struct sock *__inet_lookup_listener(struct net *net,
static inline struct sock *inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __inet_lookup_listener(net, hashinfo, saddr, sport,
+ return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, ntohs(dport), dif);
}
@@ -299,6 +304,7 @@ static inline struct sock *
static inline struct sock *__inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif)
@@ -307,12 +313,13 @@ static inline struct sock *__inet_lookup(struct net *net,
struct sock *sk = __inet_lookup_established(net, hashinfo,
saddr, sport, daddr, hnum, dif);
- return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport,
- daddr, hnum, dif);
+ return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ sport, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif)
@@ -320,7 +327,8 @@ static inline struct sock *inet_lookup(struct net *net,
struct sock *sk;
local_bh_disable();
- sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif);
+ sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ dport, dif);
local_bh_enable();
return sk;
@@ -328,6 +336,7 @@ static inline struct sock *inet_lookup(struct net *net,
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
+ int doff,
const __be16 sport,
const __be16 dport)
{
@@ -337,8 +346,8 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
else
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
- iph->saddr, sport,
+ return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb));
}
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 3d4fb0e2a3bf..798e5f95494b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -374,6 +374,17 @@ static inline void ip_tunnel_unneed_metadata(void)
{
}
+static inline void ip_tunnel_info_opts_get(void *to,
+ const struct ip_tunnel_info *info)
+{
+}
+
+static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
+ const void *from, int len)
+{
+ info->options_len = 0;
+}
+
#endif /* CONFIG_INET */
#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e3f73fd1d53a..a42d65ca0c27 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -286,7 +286,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
struct kernel_param;
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
extern unsigned int nf_conntrack_max;
extern unsigned int nf_conntrack_hash_rnd;
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 68e509750caa..039cc29cb4a8 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -51,7 +51,7 @@ void pn_sock_init(void);
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa);
void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb);
void phonet_get_local_port_range(int *min, int *max);
-void pn_sock_hash(struct sock *sk);
+int pn_sock_hash(struct sock *sk);
void pn_sock_unhash(struct sock *sk);
int pn_sock_get_port(struct sock *sk, unsigned short sport);
diff --git a/include/net/ping.h b/include/net/ping.h
index ac80cb45e630..5fd7cc244833 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -65,7 +65,7 @@ struct pingfakehdr {
};
int ping_get_port(struct sock *sk, unsigned short ident);
-void ping_hash(struct sock *sk);
+int ping_hash(struct sock *sk);
void ping_unhash(struct sock *sk);
int ping_init_sock(struct sock *sk);
diff --git a/include/net/raw.h b/include/net/raw.h
index 6a40c6562dd2..3e789008394d 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -57,7 +57,7 @@ int raw_seq_open(struct inode *ino, struct file *file,
#endif
-void raw_hash_sk(struct sock *sk);
+int raw_hash_sk(struct sock *sk);
void raw_unhash_sk(struct sock *sk);
struct raw_sock {
diff --git a/include/net/sock.h b/include/net/sock.h
index c8d332b75a40..4c417486a1b8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -193,7 +193,7 @@ struct sock_common {
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
- struct hlist_nulls_node skc_portaddr_node;
+ struct hlist_node skc_portaddr_node;
};
struct proto *skc_prot;
possible_net_t skc_net;
@@ -699,18 +699,18 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry(__sk, list, sk_bind_node)
/**
- * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
+ * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @offset: offset of hlist_node within the struct.
*
*/
-#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
- for (pos = (head)->first; \
- (!is_a_nulls(pos)) && \
+#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
+ for (pos = rcu_dereference((head)->first); \
+ pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
- pos = pos->next)
+ pos = rcu_dereference(pos->next))
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -1019,7 +1019,7 @@ struct proto {
void (*release_cb)(struct sock *sk);
/* Keeping track of sk's, looking them up, and port selection methods. */
- void (*hash)(struct sock *sk);
+ int (*hash)(struct sock *sk);
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
@@ -1348,10 +1348,10 @@ static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
/* With per-bucket locks this operation is not-atomic, so that
* this version is not worse.
*/
-static inline void __sk_prot_rehash(struct sock *sk)
+static inline int __sk_prot_rehash(struct sock *sk)
{
sk->sk_prot->unhash(sk);
- sk->sk_prot->hash(sk);
+ return sk->sk_prot->hash(sk);
}
void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 7dda3d7adba8..aecd30308d50 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,7 +16,7 @@ struct sock_reuseport {
};
extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
diff --git a/include/net/udp.h b/include/net/udp.h
index 13bd641be453..3f52b18ea12e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -59,7 +59,7 @@ struct udp_skb_cb {
* @lock: spinlock protecting changes to head/count
*/
struct udp_hslot {
- struct hlist_nulls_head head;
+ struct hlist_head head;
int count;
spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long))));
@@ -177,9 +177,10 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
}
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
-static inline void udp_lib_hash(struct sock *sk)
+static inline int udp_lib_hash(struct sock *sk)
{
BUG();
+ return 0;
}
void udp_lib_unhash(struct sock *sk);
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index c72f2dc01d0b..63a7680347cb 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
+TRACE_EVENT(locks_get_lock_context,
+ TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+ TP_ARGS(inode, type, ctx),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(unsigned char, type)
+ __field(struct file_lock_context *, ctx)
+ ),
+
+ TP_fast_assign(
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->type = type;
+ __entry->ctx = ctx;
+ ),
+
+ TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+ TP_ARGS(inode, fl, ret),
+
+ TP_STRUCT__entry(
+ __field(struct file_lock *, fl)
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(struct file_lock *, fl_next)
+ __field(fl_owner_t, fl_owner)
+ __field(unsigned int, fl_pid)
+ __field(unsigned int, fl_flags)
+ __field(unsigned char, fl_type)
+ __field(loff_t, fl_start)
+ __field(loff_t, fl_end)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->fl = fl ? fl : NULL;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->fl_next = fl ? fl->fl_next : NULL;
+ __entry->fl_owner = fl ? fl->fl_owner : NULL;
+ __entry->fl_pid = fl ? fl->fl_pid : 0;
+ __entry->fl_flags = fl ? fl->fl_flags : 0;
+ __entry->fl_type = fl ? fl->fl_type : 0;
+ __entry->fl_start = fl ? fl->fl_start : 0;
+ __entry->fl_end = fl ? fl->fl_end : 0;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+ __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, __entry->fl_next, __entry->fl_owner,
+ __entry->fl_pid, show_fl_flags(__entry->fl_flags),
+ show_fl_type(__entry->fl_type),
+ __entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1324b0292ec2..a6b339e1d70e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -715,9 +715,13 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
__SYSCALL(__NR_membarrier, sys_membarrier)
#define __NR_mlock2 284
__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#undef __NR_syscalls
-#define __NR_syscalls 285
+#define __NR_syscalls 435
/*
* All syscalls below here should go away really,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b8effe6bf417..bbbf36a963e6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,11 +99,13 @@ enum bpf_prog_type {
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
__MAX_BPF_ATTACH_TYPE
};
@@ -627,6 +629,10 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+struct bpf_sock {
+ __u32 bound_dev_if;
+};
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index c66b52303114..51caeb7797d0 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -93,6 +93,7 @@ struct inodes_stat_t {
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
#define MS_NOSEC (1<<28)
#define MS_BORN (1<<29)
#define MS_ACTIVE (1<<30)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5f0fe019a720..ed6e31d9c195 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -9,6 +9,7 @@
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
diff --git a/init/Kconfig b/init/Kconfig
index af433ff5356f..a5ae0470b7b2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1462,9 +1462,6 @@ endchoice
config SYSCTL
bool
-config ANON_INODES
- bool
-
config HAVE_UID16
bool
@@ -1654,14 +1651,12 @@ config HAVE_FUTEX_CMPXCHG
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
- select ANON_INODES
help
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
config SIGNALFD
bool "Enable signalfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the signalfd() system call that allows to receive signals
@@ -1671,7 +1666,6 @@ config SIGNALFD
config TIMERFD
bool "Enable timerfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the timerfd() system call that allows to receive timer
@@ -1681,7 +1675,6 @@ config TIMERFD
config EVENTFD
bool "Enable eventfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the eventfd() system call that allows to receive both
@@ -1692,7 +1685,6 @@ config EVENTFD
# syscall, maps, verifier
config BPF_SYSCALL
bool "Enable bpf() system call"
- select ANON_INODES
select BPF
default n
help
@@ -1747,7 +1739,6 @@ config BPF_UNPRIV_DEFAULT_OFF
config USERFAULTFD
bool "Enable userfaultfd() system call"
- select ANON_INODES
depends on MMU
help
Enable the userfaultfd() system call that allows to intercept and
@@ -1799,7 +1790,6 @@ config PERF_EVENTS
bool "Kernel performance events and counters"
default y if PROFILING
depends on HAVE_PERF_EVENTS
- select ANON_INODES
select IRQ_WORK
select SRCU
help
diff --git a/init/Makefile b/init/Makefile
index 243f61de2cba..d210b235c5d7 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,6 +2,8 @@
# Makefile for the linux kernel.
#
+ccflags-y := -fno-function-sections -fno-data-sections
+
obj-y := main.o version.o mounts.o
obj-y += noinitramfs.o
obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 357ce8355d57..8210c7dd7532 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -424,3 +424,36 @@ int __cgroup_bpf_run_filter(struct sock *sk,
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
+
+/**
+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+ * @sk: sock structure to manipulate
+ * @type: The type of program to be exectuted
+ *
+ * socket is passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_prog *prog;
+ int ret = 0;
+
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]->progs[0]);
+ if (prog)
+ ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 95ffe1fac0bf..2b1a925489cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -60,11 +60,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
{
u8 *ptr = NULL;
- if (k >= SKF_NET_OFF)
+ if (k >= SKF_NET_OFF) {
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
+ } else if (k >= SKF_LL_OFF) {
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return NULL;
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
+ }
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
@@ -105,19 +107,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
gfp_extra_flags;
struct bpf_prog *fp;
+ u32 pages, delta;
+ int ret;
BUG_ON(fp_old == NULL);
size = round_up(size, PAGE_SIZE);
- if (size <= fp_old->pages * PAGE_SIZE)
+ pages = size / PAGE_SIZE;
+ if (pages <= fp_old->pages)
return fp_old;
+ delta = pages - fp_old->pages;
+ ret = __bpf_prog_charge(fp_old->aux->user, delta);
+ if (ret)
+ return NULL;
+
fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
- if (fp != NULL) {
+ if (fp == NULL) {
+ __bpf_prog_uncharge(fp_old->aux->user, delta);
+ } else {
kmemcheck_annotate_bitfield(fp, meta);
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
- fp->pages = size / PAGE_SIZE;
+ fp->pages = pages;
fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a73a056ccd88..f65bd2dc07f8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -652,19 +652,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long user_bufs;
+
+ if (user) {
+ user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+ if (user_bufs > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
-
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ int ret;
- atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(prog->pages, &user->locked_vm);
+ ret = __bpf_prog_charge(user, prog->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
+
prog->aux->user = user;
return 0;
}
@@ -673,7 +693,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = prog->aux->user;
- atomic_long_sub(prog->pages, &user->locked_vm);
+ __bpf_prog_uncharge(user, prog->pages);
free_uid(user);
}
@@ -933,7 +953,24 @@ static int bpf_prog_attach(const union bpf_attr *attr)
bpf_prog_put(prog);
cgroup_put(cgrp);
break;
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ prog = bpf_prog_get_type(attr->attach_bpf_fd,
+ BPF_PROG_TYPE_CGROUP_SOCK);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ attr->attach_flags);
+ if (ret)
+ bpf_prog_put(prog);
+ cgroup_put(cgrp);
+ break;
default:
return -EINVAL;
}
@@ -961,7 +998,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET_EGRESS:
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
-
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
default:
return -EINVAL;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d36c4f914a1e..c2508ca442b7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1544,6 +1544,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
+ u16 dfl_disable_ss_mask = 0;
lockdep_assert_held(&cgroup_mutex);
@@ -1560,8 +1561,28 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;
+
+ /*
+ * Collect ssid's that need to be disabled from default
+ * hierarchy.
+ */
+ if (ss->root == &cgrp_dfl_root)
+ dfl_disable_ss_mask |= 1 << ssid;
+
} while_each_subsys_mask();
+ if (dfl_disable_ss_mask) {
+ struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
+
+ /*
+ * Controllers from default hierarchy that need to be rebound
+ * are all disabled together in one go.
+ */
+ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
+
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
@@ -1570,10 +1591,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
WARN_ON(!css || cgroup_css(dcgrp, ss));
- /* disable from the source */
- src_root->subsys_mask &= ~(1 << ssid);
- WARN_ON(cgroup_apply_control(scgrp));
- cgroup_finalize_control(scgrp, 0);
+ if (src_root != &cgrp_dfl_root) {
+ /* disable from the source */
+ src_root->subsys_mask &= ~(1 << ssid);
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
diff --git a/kernel/exit.c b/kernel/exit.c
index babbc3c0a181..052aa52de331 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -616,6 +616,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
+ tsk->exit_state = EXIT_ZOMBIE;
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
diff --git a/kernel/fork.c b/kernel/fork.c
index 92a0df862115..a21adc0155b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,6 +11,7 @@
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/
+#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/unistd.h>
@@ -38,6 +39,7 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/seccomp.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
@@ -1294,6 +1296,84 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
task->pids[type].pid = pid;
}
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+ struct pid *pid = file->private_data;
+
+ file->private_data = NULL;
+ put_pid(pid);
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct pid_namespace *ns = file_inode(m->file)->i_sb->s_fs_info;
+ struct pid *pid = f->private_data;
+
+ seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ seq_putc(m, '\n');
+}
+#endif
+
+/*
+ * Poll support for process exit notification.
+ */
+static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct task_struct *task;
+ struct pid *pid = file->private_data;
+ int poll_flags = 0;
+
+ poll_wait(file, &pid->wait_pidfd, pts);
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ /*
+ * Inform pollers only when the whole thread group exits.
+ * If the thread group leader exits before all other threads in the
+ * group, then poll(2) should block, similar to the wait(2) family.
+ */
+ if (!task || (task->exit_state && thread_group_empty(task)))
+ poll_flags = POLLIN | POLLRDNORM;
+ rcu_read_unlock();
+
+ return poll_flags;
+}
+
+const struct file_operations pidfd_fops = {
+ .release = pidfd_release,
+ .poll = pidfd_poll,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -1305,13 +1385,14 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
+ int __user *parent_tidptr,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
- int retval;
+ int pidfd = -1, retval;
struct task_struct *p;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
@@ -1356,6 +1437,31 @@ static struct task_struct *copy_process(unsigned long clone_flags,
return ERR_PTR(-EINVAL);
}
+ if (clone_flags & CLONE_PIDFD) {
+ int reserved;
+
+ /*
+ * - CLONE_PARENT_SETTID is useless for pidfds and also
+ * parent_tidptr is used to return pidfds.
+ * - CLONE_DETACHED is blocked so that we can potentially
+ * reuse it later for CLONE_PIDFD.
+ * - CLONE_THREAD is blocked until someone really needs it.
+ */
+ if (clone_flags &
+ (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Verify that parent_tidptr is sane so we can potentially
+ * reuse it later.
+ */
+ if (get_user(reserved, parent_tidptr))
+ return ERR_PTR(-EFAULT);
+
+ if (reserved != 0)
+ return ERR_PTR(-EINVAL);
+ }
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1538,6 +1644,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
+ /*
+ * This has to happen after we've potentially unshared the file
+ * descriptor table (so that the pidfd doesn't leak into the child
+ * if the fd table isn't shared).
+ */
+ if (clone_flags & CLONE_PIDFD) {
+ retval = pidfd_create(pid);
+ if (retval < 0)
+ goto bad_fork_free_pid;
+
+ pidfd = retval;
+ retval = put_user(pidfd, parent_tidptr);
+ if (retval)
+ goto bad_fork_put_pidfd;
+ }
+
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1587,7 +1709,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
retval = cgroup_can_fork(p);
if (retval)
- goto bad_fork_free_pid;
+ goto bad_fork_cgroup_threadgroup_change_end;
/*
* From this point on we must avoid any synchronous user-space
@@ -1698,8 +1820,12 @@ bad_fork_cancel_cgroup:
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
+bad_fork_cgroup_threadgroup_change_end:
+ cgroup_threadgroup_change_end(current);
+bad_fork_put_pidfd:
+ if (clone_flags & CLONE_PIDFD)
+ sys_close(pidfd);
bad_fork_free_pid:
- threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
@@ -1754,7 +1880,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+ task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
@@ -1799,7 +1925,7 @@ long _do_fork(unsigned long clone_flags,
trace = 0;
}
- p = copy_process(clone_flags, stack_start, stack_size,
+ p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
/*
* Do this prior waking up the new thread - the thread pointer
diff --git a/kernel/pid.c b/kernel/pid.c
index ccfdb56321c6..9bc06f3a2b54 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,8 @@
#include <linux/syscalls.h>
#include <linux/proc_ns.h>
#include <linux/proc_fs.h>
+#include <linux/anon_inodes.h>
+//#include <linux/sched/signal.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -331,6 +333,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
+ init_waitqueue_head(&pid->wait_pidfd);
+
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
@@ -564,6 +568,76 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return pid;
}
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
+/**
+ * pidfd_open() - Open new pid file descriptor.
+ *
+ * @pid: pid for which to retrieve a pidfd
+ * @flags: flags to pass
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set for
+ * the process identified by @pid. Currently, the process identified by
+ * @pid must be a thread-group leader. This restriction currently exists
+ * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
+ * be used with CLONE_THREAD) and pidfd polling (only supports thread group
+ * leaders).
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
+{
+ int fd, ret;
+ struct pid *p;
+ struct task_struct *tsk;
+
+ if (flags)
+ return -EINVAL;
+
+ if (pid <= 0)
+ return -EINVAL;
+
+ p = find_get_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ ret = 0;
+ rcu_read_lock();
+ tsk = pid_task(p, PIDTYPE_PID);
+ /* Check that pid belongs to a group leader task */
+ if (!tsk || !thread_group_leader(tsk))
+ ret = -EINVAL;
+ rcu_read_unlock();
+
+ fd = ret ?: pidfd_create(p);
+ put_pid(p);
+ return fd;
+}
+
/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
diff --git a/kernel/signal.c b/kernel/signal.c
index a699055ebfe8..a9697e189a58 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,7 +14,9 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
@@ -1632,6 +1634,14 @@ ret:
return ret;
}
+static void do_notify_pidfd(struct task_struct *task)
+{
+ struct pid *pid;
+
+ pid = task_pid(task);
+ wake_up_all(&pid->wait_pidfd);
+}
+
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1655,6 +1665,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ /* Wake up all pidfd waiters */
+ do_notify_pidfd(tsk);
+
if (sig != SIGCHLD) {
/*
* This is only possible if parent == real_parent.
@@ -2922,6 +2935,15 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
return ret;
}
+static inline void prepare_kill_siginfo(int sig, struct siginfo *info)
+{
+ info->si_signo = sig;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+ info->si_pid = task_tgid_vnr(current);
+ info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
@@ -2931,15 +2953,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_USER;
- info.si_pid = task_tgid_vnr(current);
- info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ prepare_kill_siginfo(sig, &info);
return kill_something_info(sig, &info, pid);
}
+/*
+ * Verify that the signaler and signalee either are in the same pid namespace
+ * or that the signaler's pid namespace is an ancestor of the signalee's pid
+ * namespace.
+ */
+static bool access_pidfd_pidns(struct pid *pid)
+{
+ struct pid_namespace *active = task_active_pid_ns(current);
+ struct pid_namespace *p = ns_of_pid(pid);
+
+ for (;;) {
+ if (!p)
+ return false;
+ if (p == active)
+ break;
+ p = p->parent;
+ }
+
+ return true;
+}
+
+static struct pid *pidfd_to_pid(const struct file *file)
+{
+ if (file->f_op == &pidfd_fops)
+ return file->private_data;
+
+ return tgid_pidfd_to_pid(file);
+}
+
+static int copy_siginfo_from_user_any(siginfo_t *kinfo, siginfo_t __user *info)
+{
+#ifdef CONFIG_COMPAT
+ /*
+ * Avoid hooking up compat syscalls and instead handle necessary
+ * conversions here. Note, this is a stop-gap measure and should not be
+ * considered a generic solution.
+ */
+ if (in_compat_syscall())
+ return copy_siginfo_from_user32(
+ kinfo, (struct compat_siginfo __user *)info);
+#endif
+ return copy_from_user(kinfo, info, sizeof(siginfo_t));
+}
+
+/**
+ * sys_pidfd_send_signal - Signal a process through a pidfd
+ * @pidfd: file descriptor of the process
+ * @sig: signal to send
+ * @info: signal info
+ * @flags: future flags
+ *
+ * The syscall currently only signals via PIDTYPE_PID which covers
+ * kill(<positive-pid>, <signal>. It does not signal threads or process
+ * groups.
+ * In order to extend the syscall to threads and process groups the @flags
+ * argument should be used. In essence, the @flags argument will determine
+ * what is signaled and not the file descriptor itself. Put in other words,
+ * grouping is a property of the flags argument not a property of the file
+ * descriptor.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+ siginfo_t __user *, info, unsigned int, flags)
+{
+ int ret;
+ struct fd f;
+ struct pid *pid;
+ siginfo_t kinfo;
+
+ /* Enforce flags be set to 0 until we add an extension. */
+ if (flags)
+ return -EINVAL;
+
+ f = fdget(pidfd);
+ if (!f.file)
+ return -EBADF;
+
+ /* Is this a pidfd? */
+ pid = pidfd_to_pid(f.file);
+ if (IS_ERR(pid)) {
+ ret = PTR_ERR(pid);
+ goto err;
+ }
+
+ ret = -EINVAL;
+ if (!access_pidfd_pidns(pid))
+ goto err;
+
+ if (info) {
+ ret = copy_siginfo_from_user_any(&kinfo, info);
+ if (unlikely(ret))
+ goto err;
+
+ ret = -EINVAL;
+ if (unlikely(sig != kinfo.si_signo))
+ goto err;
+
+ /* Only allow sending arbitrary signals to yourself. */
+ ret = -EPERM;
+ if ((task_pid(current) != pid) &&
+ (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+ goto err;
+ } else {
+ prepare_kill_siginfo(sig, &kinfo);
+ }
+
+ ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+ fdput(f);
+ return ret;
+}
+
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c5484723abda..74fa302d9a68 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -128,8 +128,9 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
-#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs);
+#define ftrace_ops_list_func ftrace_ops_no_ops
#endif
/*
@@ -5229,7 +5230,8 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
@@ -5677,14 +5679,17 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
}
+void ftrace_graph_return_stub(struct ftrace_graph_ret *trace)
+{
+}
+
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
return 0;
}
/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -5912,7 +5917,7 @@ void unregister_ftrace_graph(void)
goto out;
ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_return = ftrace_graph_return_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
__ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a67f792fb950..17996354c745 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7026,7 +7026,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
}
-static struct vfsmount *trace_automount(void *ingore)
+static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
@@ -7039,7 +7039,7 @@ static struct vfsmount *trace_automount(void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+ mnt = vfs_submount(mntpt, type, "tracefs", NULL);
put_filesystem(type);
if (IS_ERR(mnt))
return NULL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 8309b5b773cd..1466db70dc42 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2245,7 +2245,7 @@ static struct page *wait_on_page_read(struct page *page)
static struct page *do_read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *, struct page *),
+ int (*filler)(struct file *, struct page *),
void *data,
gfp_t gfp)
{
@@ -2360,7 +2360,7 @@ out:
*/
struct page *read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *, struct page *),
+ int (*filler)(struct file *, struct page *),
void *data)
{
return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -2382,7 +2382,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
pgoff_t index,
gfp_t gfp)
{
- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+ filler_t *filler = mapping->a_ops->readpage;
return do_read_cache_page(mapping, index, filler, NULL, gfp);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd0c4bc00941..c7126875d8dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2087,8 +2087,9 @@ static void drain_pages(unsigned int cpu)
* The CPU has to be pinned. When zone parameter is non-NULL, spill just
* the single zone's pages.
*/
-void drain_local_pages(struct zone *zone)
+void drain_local_pages(void *z)
{
+ struct zone *zone = (struct zone *)z;
int cpu = smp_processor_id();
if (zone)
@@ -2148,8 +2149,7 @@ void drain_all_pages(struct zone *zone)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
- on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
- zone, 1);
+ on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, zone, 1);
}
#ifdef CONFIG_HIBERNATION
diff --git a/mm/readahead.c b/mm/readahead.c
index fb1d210dbf05..3e25601ce14b 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -81,7 +81,7 @@ static void read_cache_pages_invalidate_pages(struct address_space *mapping,
* Hides the details of the LRU cache etc from the filesystems.
*/
int read_cache_pages(struct address_space *mapping, struct list_head *pages,
- int (*filler)(void *, struct page *), void *data)
+ int (*filler)(struct file *, struct page *), void *data)
{
struct page *page;
int ret = 0;
diff --git a/net/compat.c b/net/compat.c
index 14459a87fdbc..53e58bfe0dc6 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -356,7 +356,8 @@ static int do_set_sock_timeout(struct socket *sock, int level,
static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
- if (optname == SO_ATTACH_FILTER)
+ if (optname == SO_ATTACH_FILTER ||
+ optname == SO_ATTACH_REUSEPORT_CBPF)
return do_set_attach_filter(sock, level, optname,
optval, optlen);
if (!COMPAT_USE_64BIT_TIME &&
diff --git a/net/core/filter.c b/net/core/filter.c
index fa6ec1310621..ebf7754810f0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1398,7 +1398,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1433,7 +1433,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -2594,6 +2594,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
default:
return NULL;
}
@@ -2719,6 +2721,29 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+
+ return true;
+}
+
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -2977,6 +3002,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -3050,6 +3099,12 @@ static const struct bpf_verifier_ops cg_skb_ops = {
.convert_ctx_access = sk_filter_convert_ctx_access,
};
+static const struct bpf_verifier_ops cg_sock_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+ .convert_ctx_access = sock_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3075,6 +3130,11 @@ static struct bpf_prog_type_list cg_skb_type __read_mostly = {
.type = BPF_PROG_TYPE_CGROUP_SKB,
};
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+ .ops = &cg_sock_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCK
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
@@ -3082,6 +3142,7 @@ static int __init register_sk_filter_ops(void)
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
bpf_register_prog_type(&cg_skb_type);
+ bpf_register_prog_type(&cg_sock_type);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7734f91abd6..15a7a2667e1e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3105,6 +3105,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+ (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+ /* gso_size is untrusted, and we have a frag_list with a linear
+ * non head_frag head.
+ *
+ * (we assume checking the first list_skb member suffices;
+ * i.e if either of the list_skb members have non head_frag
+ * head, then the first one has too).
+ *
+ * If head_skb's headlen does not fit requested gso_size, it
+ * means that the frag_list members do NOT terminate on exact
+ * gso_size boundaries. Hence we cannot perform skb_frag_t page
+ * sharing. Therefore we must fallback to copying the frag_list
+ * skbs; we do so by disabling SG.
+ */
+ if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+ features &= ~NETIF_F_SG;
+ }
+
__skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
@@ -3122,9 +3141,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
- len = head_skb->len - offset;
- if (len > mss)
- len = mss;
+ if (unlikely(mss == GSO_BY_FRAGS)) {
+ len = list_skb->len;
+ } else {
+ len = head_skb->len - offset;
+ if (len > mss)
+ len = mss;
+ }
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ae0969c0fc2e..2ba3ae7720a8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
@@ -87,22 +93,42 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
return more_reuse;
}
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
/**
* reuseport_add_sock - Add a socket to the reuseport group of another.
* @sk: New socket to add to the group.
* @sk2: Socket belonging to the existing reuseport group.
* May return ENOMEM and not add socket to group under memory pressure.
*/
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
- struct sock_reuseport *reuse;
+ struct sock_reuseport *old_reuse, *reuse;
+
+ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+ int err = reuseport_alloc(sk2);
+
+ if (err)
+ return err;
+ }
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "socket already in reuseport group");
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
if (reuse->num_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
@@ -120,20 +146,12 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
spin_unlock_bh(&reuseport_lock);
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
EXPORT_SYMBOL(reuseport_add_sock);
-static void reuseport_free_rcu(struct rcu_head *head)
-{
- struct sock_reuseport *reuse;
-
- reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
- kfree(reuse);
-}
-
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
@@ -173,7 +191,7 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
/* temporarily advance data past protocol header */
if (!pskb_pull(skb, hdr_len)) {
- consume_skb(nskb);
+ kfree_skb(nskb);
return NULL;
}
index = bpf_prog_run_save_cb(prog, skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f51280374f0..ac0e8152c0f1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
}
lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index bb1a7405dc0e..0958721e4d3d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -684,7 +684,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
if (!sk) {
@@ -1010,7 +1010,7 @@ static struct proto dccp_v6_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 42ab1b61b513..6383627b783e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
static HLIST_HEAD(raw_head);
static DEFINE_RWLOCK(raw_lock);
-static void raw_hash(struct sock *sk)
+static int raw_hash(struct sock *sk)
{
write_lock_bh(&raw_lock);
sk_add_node(sk, &raw_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&raw_lock);
+
+ return 0;
}
static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
return container_of(sk, struct dgram_sock, sk);
}
-static void dgram_hash(struct sock *sk)
+static int dgram_hash(struct sock *sk)
{
write_lock_bh(&dgram_lock);
sk_add_node(sk, &dgram_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&dgram_lock);
+
+ return 0;
}
static void dgram_unhash(struct sock *sk)
@@ -1034,8 +1038,13 @@ static int ieee802154_create(struct net *net, struct socket *sock,
/* Checksums on by default */
sock_set_flag(sk, SOCK_ZAPPED);
- if (sk->sk_prot->hash)
- sk->sk_prot->hash(sk);
+ if (sk->sk_prot->hash) {
+ rc = sk->sk_prot->hash(sk);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 39eeb5c2d23d..a35c252235ae 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -387,13 +387,27 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
sk_common_release(sk);
+ goto out;
+ }
}
out:
return err;
@@ -1159,8 +1173,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
* Besides that, it does not check for connection
* uniqueness. Wait for troubles.
*/
- __sk_prot_rehash(sk);
- return 0;
+ return __sk_prot_rehash(sk);
}
int inet_sk_rebuild_header(struct sock *sk)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4b50bcbc6100..f6f3ee843d12 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -157,6 +159,7 @@ again:
sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
@@ -225,15 +228,18 @@ tb_found:
if (((tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
goto success;
} else {
ret = 1;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;
@@ -755,6 +761,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -772,13 +779,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fcb83b2a61f0..1582fe5b04c3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -370,18 +370,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
struct sock *sk;
if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
req->id.idiag_dport, req->id.idiag_src[3],
req->id.idiag_sport, req->id.idiag_if);
else
- sk = inet6_lookup(net, hashinfo,
+ sk = inet6_lookup(net, hashinfo, NULL, 0,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 738cd5c822b1..b947e4c9be18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
@@ -206,6 +208,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif)
@@ -215,6 +218,7 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
rcu_read_lock();
@@ -230,6 +234,15 @@ begin:
if (reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -247,11 +260,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -450,32 +465,73 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
}
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
-void __inet_hash(struct sock *sk, struct sock *osk)
+static int inet_reuseport_add_sock(struct sock *sk,
+ struct inet_listen_hashbucket *ilb,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
+ struct sock *sk2;
+ struct hlist_nulls_node *node;
+ kuid_t uid = sock_i_uid(sk);
+
+ sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ inet_csk(sk2)->icsk_bind_hash == tb &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ saddr_same(sk, sk2, false))
+ return reuseport_add_sock(sk, sk2);
+ }
+
+ return reuseport_alloc(sk);
+}
+
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
+ int err = 0;
if (sk->sk_state != TCP_LISTEN) {
inet_ehash_nolisten(sk, osk);
- return;
+ return 0;
}
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
+ if (sk->sk_reuseport) {
+ err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ if (err)
+ goto unlock;
+ }
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+unlock:
spin_unlock(&ilb->lock);
+
+ return err;
}
EXPORT_SYMBOL(__inet_hash);
-void inet_hash(struct sock *sk)
+int inet_hash(struct sock *sk)
{
+ int err = 0;
+
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk, NULL);
+ err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
local_bh_enable();
}
+
+ return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
@@ -494,6 +550,8 @@ void inet_unhash(struct sock *sk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
done = __sk_nulls_del_node_init_rcu(sk);
if (done)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c3f808602b65..f56e803c07f9 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
}
EXPORT_SYMBOL_GPL(ping_get_port);
-void ping_hash(struct sock *sk)
+int ping_hash(struct sock *sk)
{
pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
+
+ return 0;
}
void ping_unhash(struct sock *sk)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 81ade975db5f..b75a24adb580 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk)
+int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e373dcdab5e2..4a9d8c117794 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -643,8 +643,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net,
- &tcp_hashinfo, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
@@ -1631,7 +1631,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest);
if (!sk)
goto no_tcp_socket;
@@ -1754,7 +1755,8 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo,
+ &tcp_hashinfo, skb,
+ __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0a0d44c53373..4976a1c9835f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
unsigned int log)
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
- sk_nulls_for_each(sk2, node, &hslot->head) {
+ sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(bitmap || udp_sk(sk2)->udp_port_hash == num) &&
@@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
bool match_wildcard))
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
int res = 0;
spin_lock(&hslot2->lock);
- udp_portaddr_for_each_entry(sk2, node, &hslot2->head) {
+ udp_portaddr_for_each_entry(sk2, &hslot2->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(udp_sk(sk2)->udp_port_hash == num) &&
@@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
bool match_wildcard))
{
struct net *net = sock_net(sk);
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
struct sock *sk2;
- sk_nulls_for_each(sk2, node, &hslot->head) {
+ sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
sk2->sk_family == sk->sk_family &&
@@ -224,10 +221,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -338,17 +332,23 @@ found:
goto fail_unlock;
}
- sk_nulls_add_node_rcu(sk, &hslot->head);
+ sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock(&hslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
- &hslot2->head);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
+ else
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
hslot2->count++;
spin_unlock(&hslot2->lock);
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -361,8 +361,8 @@ EXPORT_SYMBOL(udp_lib_get_port);
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -498,34 +498,31 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = 0;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
- if (sk2) {
- result = sk2;
- goto found;
- }
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ badness = score;
+ result = sk;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -533,23 +530,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
@@ -561,14 +541,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int dif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -578,7 +556,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -588,33 +566,29 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = 0;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -622,25 +596,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, saddr, hnum, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -656,13 +611,24 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif,
- &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
+#endif
static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
@@ -764,7 +730,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
void udp_err(struct sk_buff *skb, u32 info)
@@ -1482,13 +1448,13 @@ void udp_lib_unhash(struct sock *sk)
spin_lock_bh(&hslot->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
- if (sk_nulls_del_node_init_rcu(sk)) {
+ if (sk_del_node_init_rcu(sk)) {
hslot->count--;
inet_sk(sk)->inet_num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
}
@@ -1521,12 +1487,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
if (hslot2 != nhslot2) {
spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
spin_lock(&nhslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
&nhslot2->head);
nhslot2->count++;
spin_unlock(&nhslot2->lock);
@@ -1704,35 +1670,6 @@ drop:
return -1;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- unsigned int i;
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
-
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
-
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
@@ -1756,14 +1693,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable,
int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
- struct hlist_nulls_node *node;
+ struct sock *sk, *first = NULL;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = skb->dev->ifindex;
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
+ int dif = skb->dev->ifindex;
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
@@ -1774,23 +1711,28 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+
+ if (!first) {
+ first = sk;
+ continue;
}
- }
+ nskb = skb_clone(skb, GFP_ATOMIC);
- spin_unlock(&hslot->lock);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
+ }
+ if (udp_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -1798,16 +1740,13 @@ start_lookup:
goto start_lookup;
}
- /*
- * do the slow work with no lock held
- */
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udp_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -1912,7 +1851,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
inet_compute_pseudo);
ret = udp_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input, but
* it wants the return to be -protocol, or 0
@@ -1973,49 +1911,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
int dif)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
- unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
+ unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
/* Do not bother scanning a too big list */
if (hslot->count > 10)
return NULL;
- rcu_read_lock();
-begin:
- count = 0;
result = NULL;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
- if (__udp_is_mcast_sock(net, sk,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum)) {
+ sk_for_each_rcu(sk, &hslot->head) {
+ if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
+ rmt_port, rmt_addr, dif, hnum)) {
+ if (result)
+ return NULL;
result = sk;
- ++count;
- }
- }
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
- if (count != 1 ||
- unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!__udp_is_mcast_sock(net, result,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum))) {
- sock_put(result);
- result = NULL;
}
}
- rcu_read_unlock();
+
return result;
}
@@ -2028,37 +1941,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
- struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ struct sock *sk;
- rcu_read_lock();
- result = NULL;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr, ports, dif))
- result = sk;
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (INET_MATCH(sk, net, acookie, rmt_addr,
+ loc_addr, ports, dif))
+ return sk;
/* Only check first socket in chain */
break;
}
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr,
- ports, dif))) {
- sock_put(result);
- result = NULL;
- }
- }
- rcu_read_unlock();
- return result;
+ return NULL;
}
void udp_v4_early_demux(struct sk_buff *skb)
@@ -2066,7 +1964,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
const struct udphdr *uh;
- struct sock *sk;
+ struct sock *sk = NULL;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
int ours;
@@ -2098,11 +1996,9 @@ void udp_v4_early_demux(struct sk_buff *skb)
} else if (skb->pkt_type == PACKET_HOST) {
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
- } else {
- return;
}
- if (!sk)
+ if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
return;
skb->sk = sk;
@@ -2395,7 +2291,6 @@ struct proto udp_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
@@ -2417,14 +2312,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
for (state->bucket = start; state->bucket <= state->udp_table->mask;
++state->bucket) {
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == state->family)
@@ -2443,7 +2337,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
struct net *net = seq_file_net(seq);
do {
- sk = sk_nulls_next(sk);
+ sk = sk_next(sk);
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) {
@@ -2658,12 +2552,12 @@ void __init udp_table_init(struct udp_table *table, const char *name)
table->hash2 = table->hash + (table->mask + 1);
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
+ INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
+ INIT_HLIST_HEAD(&table->hash2[i].head);
table->hash2[i].count = 0;
spin_lock_init(&table->hash2[i].lock);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 6510e961aaa6..9a89c10a55f0 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -36,10 +36,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
int err = -EINVAL;
- struct sock *sk;
+ struct sock *sk = NULL;
struct sk_buff *rep;
struct net *net = sock_net(in_skb->sk);
+ rcu_read_lock();
if (req->sdiag_family == AF_INET)
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
@@ -54,9 +55,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_dport,
req->id.idiag_if, tbl, NULL);
#endif
- else
- goto out_nosk;
-
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ rcu_read_unlock();
err = -ENOENT;
if (!sk)
goto out_nosk;
@@ -97,25 +98,24 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
- int num, s_num, slot, s_slot;
- struct net *net = sock_net(skb->sk);
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
s_slot = cb->args[0];
num = s_num = cb->args[1];
for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
- struct sock *sk;
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &table->hash[slot];
+ struct sock *sk;
num = 0;
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 78766b32b78b..705d9fbf0bcf 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -55,7 +55,6 @@ struct proto udplite_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8e38e058a66c..3cd9ad455910 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -250,7 +250,11 @@ lookup_protocol:
* creation time automatically shares.
*/
inet->inet_sport = htons(inet->inet_num);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
@@ -259,6 +263,14 @@ lookup_protocol:
goto out;
}
}
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
out:
return err;
out_rcu_unlock:
@@ -277,6 +289,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -396,19 +409,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 908525ae5d19..c7ce585ac17e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
#include <net/ip6_route.h>
#include <net/sock.h>
#include <net/inet6_connection_sock.h>
+#include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
@@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..c27207563d10 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
#include <linux/module.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
const struct hlist_nulls_node *node;
struct sock *result;
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -177,6 +193,7 @@ begin:
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sock *sk;
local_bh_disable();
- sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+ sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ ntohs(dport), dif);
local_bh_enable();
return sk;
@@ -274,3 +292,61 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
+
+int inet6_hash(struct sock *sk)
+{
+ int err = 0;
+
+ if (sk->sk_state != TCP_CLOSE) {
+ local_bh_disable();
+ err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ local_bh_enable();
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(inet6_hash);
+
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e2fe07f7ded9..50091116fb55 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -895,7 +895,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->saddr,
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
@@ -1420,8 +1421,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- inet6_iif(skb));
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ th->source, th->dest, inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1552,6 +1553,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), tcp_v6_iif(skb));
@@ -1921,7 +1923,7 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b345b37edd4f..ce3a100cbf68 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
+#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
@@ -78,49 +79,6 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-
static u32 udp6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
@@ -252,34 +210,32 @@ static inline int compute_score2(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = -1;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
- if (sk2) {
- result = sk2;
- goto found;
- }
+
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -287,27 +243,10 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
+/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
@@ -315,14 +254,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -332,7 +269,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
@@ -342,32 +279,28 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -375,25 +308,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, saddr, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -413,12 +327,24 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+#endif
/*
* This should be easy, if there is something there we
@@ -584,7 +510,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), udptable, skb);
+ inet6_iif(skb), udptable, NULL);
if (!sk) {
ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -615,7 +541,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -775,33 +701,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
@@ -820,15 +719,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable, int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ struct sock *sk, *first = NULL;
const struct udphdr *uh = udp_hdr(skb);
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = inet6_iif(skb);
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ int dif = inet6_iif(skb);
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -839,27 +738,32 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_v6_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum) &&
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- (uh->check || udp_sk(sk)->no_check6_rx)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ continue;
+ if (!first) {
+ first = sk;
+ continue;
+ }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
}
- }
- spin_unlock(&hslot->lock);
+ if (udpv6_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -867,13 +771,13 @@ start_lookup:
goto start_lookup;
}
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udpv6_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -881,10 +785,10 @@ start_lookup:
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
- struct sock *sk;
struct udphdr *uh;
- const struct in6_addr *saddr, *daddr;
+ struct sock *sk;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -938,7 +842,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
}
@@ -948,7 +851,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input */
if (ret > 0)
@@ -995,7 +897,6 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif)
{
struct sock *sk;
- struct hlist_nulls_node *hnode;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
@@ -1003,7 +904,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
- udp_portaddr_for_each_entry_rcu(sk, hnode, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
return sk;
@@ -1648,7 +1549,6 @@ struct proto udpv6_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d1eaeeaa34d2..af2895c77ed6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -50,7 +50,6 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index aed0c7350988..3a31370d568a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -212,13 +212,14 @@ discard:
return 0;
}
-static void l2tp_ip_hash(struct sock *sk)
+static int l2tp_ip_hash(struct sock *sk)
{
if (sk_unhashed(sk)) {
write_lock_bh(&l2tp_ip_lock);
sk_add_node(sk, &l2tp_ip_table);
write_unlock_bh(&l2tp_ip_lock);
}
+ return 0;
}
static void l2tp_ip_unhash(struct sock *sk)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bcb7bce86d98..8cc6a554b6f8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -225,13 +225,14 @@ discard:
return 0;
}
-static void l2tp_ip6_hash(struct sock *sk)
+static int l2tp_ip6_hash(struct sock *sk)
{
if (sk_unhashed(sk)) {
write_lock_bh(&l2tp_ip6_lock);
sk_add_node(sk, &l2tp_ip6_table);
write_unlock_bh(&l2tp_ip6_lock);
}
+ return 0;
}
static void l2tp_ip6_unhash(struct sock *sk)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2ed50ba6c1af..d32a3fd4528e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1600,7 +1600,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
{
int i, bucket, rc;
unsigned int hashsize, old_size;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 1fb2258c3535..8039bcd60758 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -107,7 +107,7 @@ static int __init nf_nat_irc_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
* belonging to established connections going through that one.
*/
static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
- &iph->saddr, laddr,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ tproto, &iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 939821821fcb..c5d930670cf0 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
* box.
*/
static struct sock *
-xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return __inet_lookup(net, &tcp_hashinfo,
+ return __inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -148,6 +149,8 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -173,6 +176,10 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -205,9 +212,9 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- indev);
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), data_skb, doff,
+ protocol, saddr, daddr, sport,
+ dport, indev);
return sk;
}
@@ -328,14 +335,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
}
static struct sock *
-xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo,
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -354,6 +362,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -375,6 +385,10 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
sport = hp->source;
daddr = &iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
struct ipv6hdr ipv6_var;
@@ -389,8 +403,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), data_skb, doff,
+ tproto, saddr, daddr, sport, dport,
indev);
return sk;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
rcu_read_unlock();
}
-void pn_sock_hash(struct sock *sk)
+int pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
mutex_lock(&pnsocks.lock);
sk_add_node_rcu(sk, hlist);
mutex_unlock(&pnsocks.lock);
+
+ return 0;
}
EXPORT_SYMBOL(pn_sock_hash);
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fcac88f1774b..4d60f1e42f42 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6171,9 +6171,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
return retval;
}
-static void sctp_hash(struct sock *sk)
+static int sctp_hash(struct sock *sk)
{
/* STUB */
+ return 0;
}
static void sctp_unhash(struct sock *sk)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 41f6e964fe91..1a7d930a867c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
}
static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 2e69bf6da207..80cfd227c984 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,7 +37,9 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
(void *) BPF_FUNC_clone_redirect;
static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
-static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
(void *) BPF_FUNC_perf_event_output;
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 1112dd94b440..8a491e6b542c 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -148,6 +148,37 @@ cc-disable-warning = $(call try-run,\
# Expands to either gcc or clang
cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
+# __cc-version
+# Returns compiler version
+__cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/$(cc-name)-version.sh $(CC))
+
+# __cc-fullversion
+# Returns full compiler version
+__cc-fullversion = $(shell $(CONFIG_SHELL) \
+ $(srctree)/scripts/$(cc-name)-version.sh -p $(CC))
+
+# __cc-ifversion
+# Matches compiler name and version
+# Usage: EXTRA_CFLAGS += $(call cc-if-name-version, gcc, -lt, 0402, -O1)
+__cc-ifversion = $(shell [ $(cc-name) = $(1) ] && [ $(__cc-version) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# __cc-if-fullversion
+# Matches compiler name and full version
+# Usage: EXTRA_CFLAGS += $(call cc-if-name-fullversion, gcc, -lt, 040502, -O1)
+__cc-if-fullversion = $(shell [ $(cc-name) = $(1) ] && [ $(__cc-fullversion) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# gcc-ifversion
+gcc-ifversion = $(call __cc-ifversion, gcc, $(1), $(2), $(3), $(4))
+
+# gcc-if-fullversion
+gcc-if-fullversion = (call __cc-if-fullversion, gcc, $(1), $(2), $(3), $(4))
+
+# clang-ifversion
+clang-ifversion = $(call __cc-ifversion, clang, $(1), $(2), $(3), $(4))
+
+# clang-if-fullversion
+clang-if-fullversion = (call __cc-if-fullversion, clang, $(1), $(2), $(3), $(4))
+
# cc-version
cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
@@ -155,9 +186,9 @@ cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
cc-fullversion = $(shell $(CONFIG_SHELL) \
$(srctree)/scripts/gcc-version.sh -p $(CC))
-# cc-ifversion
-# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
-cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
+# backward compatibility
+cc-ifversion = $(gcc-ifversion)
+cc-if-fullversion = $(gcc-if-fullversion)
# cc-ldoption
# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
@@ -165,14 +196,18 @@ cc-ldoption = $(call try-run,\
$(CC) $(1) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
# ld-option
-# Usage: LDFLAGS += $(call ld-option, -X)
-ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2))
+# Usage: LDFLAGS += $(call ld-option, -X, -Y)
+ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2),$(3))
# ar-option
# Usage: KBUILD_ARFLAGS := $(call ar-option,D)
# Important: no spaces around options
ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))
+# ld-name
+# Expands to either bfd or gold
+ld-name = $(shell $(LD) -v 2>&1 | grep -q "GNU gold" && echo gold || echo bfd)
+
# ld-version
# Note this is mainly for HJ Lu's 3 number binutil versions
ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
@@ -181,6 +216,18 @@ ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
# Usage: $(call ld-ifversion, -ge, 22252, y)
ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4))
+# __ld-ifversion
+# Usage: $(call __ld-ifversion, gold, -ge, 112000000, y)
+__ld-ifversion = $(shell [ $(ld-name) = $(1) ] && [ $(ld-version) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# bfd-ifversion
+# Usage: $(call bfd-ifversion, -ge, 227000000, y)
+bfd-ifversion = $(call __ld-ifversion, bfd, $(1), $(2), $(3), $(4))
+
+# gold-ifversion
+# Usage: $(call gold-ifversion, -ge, 112000000, y)
+gold-ifversion = $(call __ld-ifversion, gold, $(1), $(2), $(3), $(4))
+
######
###
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 726d1b3f6759..23d49459ee66 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -280,6 +280,13 @@ define rule_as_o_S
mv -f $(dot-target).tmp $(dot-target).cmd
endef
+# List module undefined symbols (or empty line if not enabled)
+ifdef CONFIG_TRIM_UNUSED_KSYMS
+cmd_undef_syms = $(NM) $@ | sed -n 's/^ \+U //p' | xargs echo
+else
+cmd_undef_syms = echo
+endif
+
# Built-in and composite module parts
$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
@@ -290,7 +297,8 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
$(call if_changed_rule,cc_o_c)
- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
+ @{ echo $(@:.o=.ko); echo $@; \
+ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
quiet_cmd_cc_lst_c = MKLST $@
cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
@@ -415,12 +423,22 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ;
# Rule to compile a set of .o files into one .o file
#
ifdef builtin-target
-quiet_cmd_link_o_target = LD $@
+
+ifdef CONFIG_THIN_ARCHIVES
+ cmd_make_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
+ cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
+ quiet_cmd_link_o_target = AR $@
+else
+ cmd_make_builtin = $(LD) $(ld_flags) -r -o
+ cmd_make_empty_builtin = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS)
+ quiet_cmd_link_o_target = LD $@
+endif
+
# If the list of objects to link is empty, just create an empty built-in.o
cmd_link_o_target = $(if $(strip $(obj-y)),\
- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^) \
+ $(cmd_make_builtin) $@ $(filter $(obj-y), $^) \
$(cmd_secanalysis),\
- rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@)
+ $(cmd_make_empty_builtin) $@)
$(builtin-target): $(obj-y) FORCE
$(call if_changed,link_o_target)
@@ -446,7 +464,12 @@ $(modorder-target): $(subdir-ym) FORCE
#
ifdef lib-target
quiet_cmd_link_l_target = AR $@
-cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+
+ifdef CONFIG_THIN_ARCHIVES
+ cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
+else
+ cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+endif
$(lib-target): $(lib-y) FORCE
$(call if_changed,link_l_target)
@@ -461,16 +484,27 @@ endif
# <composite-object>-objs := <list of .o files>
# or
# <composite-object>-y := <list of .o files>
+# or
+# <composite-object>-m := <list of .o files>
+# The -m syntax only works if <composite object> is a module
link_multi_deps = \
$(filter $(addprefix $(obj)/, \
$($(subst $(obj)/,,$(@:.o=-objs))) \
-$($(subst $(obj)/,,$(@:.o=-y)))), $^)
+$($(subst $(obj)/,,$(@:.o=-y))) \
+$($(subst $(obj)/,,$(@:.o=-m)))), $^)
-quiet_cmd_link_multi-y = LD $@
-cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+
+ifdef CONFIG_THIN_ARCHIVES
+ quiet_cmd_link_multi-y = AR $@
+ cmd_link_multi-y = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps)
+else
+ quiet_cmd_link_multi-y = LD $@
+ cmd_link_multi-y = $(cmd_link_multi-link)
+endif
quiet_cmd_link_multi-m = LD [M] $@
-cmd_link_multi-m = $(cmd_link_multi-y)
+cmd_link_multi-m = $(cmd_link_multi-link)
$(multi-used-y): FORCE
$(call if_changed,link_multi-y)
@@ -478,8 +512,9 @@ $(call multi_depend, $(multi-used-y), .o, -objs -y)
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod)
-$(call multi_depend, $(multi-used-m), .o, -objs -y)
+ @{ echo $(@:.o=.ko); echo $(link_multi_deps); \
+ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
targets += $(multi-used-y) $(multi-used-m)
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 133edfae5b8a..a89a0c5d43b2 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -85,7 +85,7 @@ hostcxx_flags = -Wp,-MD,$(depfile) $(__hostcxx_flags)
# Create executable from a single .c file
# host-csingle -> Executable
quiet_cmd_host-csingle = HOSTCC $@
- cmd_host-csingle = $(HOSTCC) $(hostc_flags) -o $@ $< \
+ cmd_host-csingle = $(HOSTCC) $(hostc_flags) $(HOSTLDFLAGS) -o $@ $< \
$(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
$(host-csingle): $(obj)/%: $(src)/%.c FORCE
$(call if_changed_dep,host-csingle)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 5a93712ea570..2daf8d6e49a1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -48,7 +48,7 @@ subdir-ym := $(sort $(subdir-y) $(subdir-m))
# if $(foo-objs) exists, foo.o is a composite object
multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
-multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
+multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))), $(m))))
multi-used := $(multi-used-y) $(multi-used-m)
single-used-m := $(sort $(filter-out $(multi-used-m),$(obj-m)))
@@ -67,7 +67,7 @@ obj-dirs := $(dir $(multi-objs) $(obj-y))
# Replace multi-part objects by their individual parts, look at local dir only
real-objs-y := $(foreach m, $(filter-out $(subdir-obj-y), $(obj-y)), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m))) $(extra-y)
-real-objs-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m)))
+real-objs-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))),$($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)),$(m)))
# Add subdir path
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 7718a64b1cd1..8cb7971b3f25 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -115,14 +115,18 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
targets += $(modules:.ko=.mod.o)
-# Step 6), final link of the modules
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
+
+# Step 6), final link of the modules with optional arch pass after final link
quiet_cmd_ld_ko_o = LD [M] $@
- cmd_ld_ko_o = $(LD) -r $(LDFLAGS) \
- $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
- -o $@ $(filter-out FORCE,$^)
+ cmd_ld_ko_o = \
+ $(LD) -r $(LDFLAGS) \
+ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
+ -o $@ $(filter-out FORCE,$^) ; \
+ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
$(modules): %.ko :%.o %.mod.o FORCE
- $(call if_changed,ld_ko_o)
+ +$(call if_changed,ld_ko_o)
targets += $(modules)
diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
new file mode 100755
index 000000000000..9780efa56980
--- /dev/null
+++ b/scripts/clang-version.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-version [-p] clang-command
+#
+# Prints the compiler version of `clang-command' in a canonical 4-digit form
+# such as `0500' for clang-5.0 etc.
+#
+# With the -p option, prints the patchlevel as well, for example `050001' for
+# clang-5.0.1 etc.
+#
+
+if [ "$1" = "-p" ] ; then
+ with_patchlevel=1;
+ shift;
+fi
+
+compiler="$*"
+
+if [ ${#compiler} -eq 0 ]; then
+ echo "Error: No compiler specified."
+ printf "Usage:\n\t$0 <clang-command>\n"
+ exit 1
+fi
+
+MAJOR=$(echo __clang_major__ | $compiler -E -x c - | tail -n 1)
+MINOR=$(echo __clang_minor__ | $compiler -E -x c - | tail -n 1)
+if [ "x$with_patchlevel" != "x" ] ; then
+ PATCHLEVEL=$(echo __clang_patchlevel__ | $compiler -E -x c - | tail -n 1)
+ printf "%02d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
+else
+ printf "%02d%02d\\n" $MAJOR $MINOR
+fi
diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
index b071bf476fea..dd1a4bd706a2 100644
--- a/scripts/extract-cert.c
+++ b/scripts/extract-cert.c
@@ -23,6 +23,13 @@
#include <openssl/err.h>
#include <openssl/engine.h>
+/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
#define PKEY_ID_PKCS7 2
static __attribute__((noreturn))
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index ba6c34ea5429..0a08006d600a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -3,9 +3,12 @@
# link vmlinux
#
# vmlinux is linked from the objects selected by $(KBUILD_VMLINUX_INIT) and
-# $(KBUILD_VMLINUX_MAIN). Most are built-in.o files from top-level directories
-# in the kernel tree, others are specified in arch/$(ARCH)/Makefile.
-# Ordering when linking is important, and $(KBUILD_VMLINUX_INIT) must be first.
+# $(KBUILD_VMLINUX_MAIN) and $(KBUILD_VMLINUX_LIBS). Most are built-in.o files
+# from top-level directories in the kernel tree, others are specified in
+# arch/$(ARCH)/Makefile. Ordering when linking is important, and
+# $(KBUILD_VMLINUX_INIT) must be first. $(KBUILD_VMLINUX_LIBS) are archives
+# which are linked conditionally (not within --whole-archive), and do not
+# require symbol indexes added.
#
# vmlinux
# ^
@@ -16,6 +19,9 @@
# +--< $(KBUILD_VMLINUX_MAIN)
# | +--< drivers/built-in.o mm/built-in.o + more
# |
+# +--< $(KBUILD_VMLINUX_LIBS)
+# | +--< lib/lib.a + more
+# |
# +-< ${kallsymso} (see description in KALLSYMS section)
#
# vmlinux version (uname -v) cannot be updated during normal
@@ -37,12 +43,47 @@ info()
fi
}
+# Thin archive build here makes a final archive with symbol table and indexes
+# from vmlinux objects INIT and MAIN, which can be used as input to linker.
+# KBUILD_VMLINUX_LIBS archives should already have symbol table and indexes
+# added.
+#
+# Traditional incremental style of link does not require this step
+#
+# built-in.o output file
+#
+archive_builtin()
+{
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ info AR built-in.o
+ rm -f built-in.o;
+ ${AR} rcsTP${KBUILD_ARFLAGS} built-in.o \
+ ${KBUILD_VMLINUX_INIT} \
+ ${KBUILD_VMLINUX_MAIN}
+ fi
+}
+
# Link of vmlinux.o used for section mismatch analysis
# ${1} output file
modpost_link()
{
- ${LD} ${LDFLAGS} -r -o ${1} ${KBUILD_VMLINUX_INIT} \
- --start-group ${KBUILD_VMLINUX_MAIN} --end-group
+ local objects
+
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="--whole-archive \
+ built-in.o \
+ --no-whole-archive \
+ --start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ --start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group"
+ fi
+ ${LD} ${LDFLAGS} -r -o ${1} ${objects}
}
# Link of vmlinux
@@ -51,18 +92,50 @@ modpost_link()
vmlinux_link()
{
local lds="${objtree}/${KBUILD_LDS}"
+ local objects
if [ "${SRCARCH}" != "um" ]; then
- ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
- -T ${lds} ${KBUILD_VMLINUX_INIT} \
- --start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1}
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="--whole-archive \
+ built-in.o \
+ --no-whole-archive \
+ --start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group \
+ ${1}"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ --start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group \
+ ${1}"
+ fi
+
+ ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
+ -T ${lds} ${objects}
else
- ${CC} ${CFLAGS_vmlinux} -o ${2} \
- -Wl,-T,${lds} ${KBUILD_VMLINUX_INIT} \
- -Wl,--start-group \
- ${KBUILD_VMLINUX_MAIN} \
- -Wl,--end-group \
- -lutil -lrt -lpthread ${1}
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="-Wl,--whole-archive \
+ built-in.o \
+ -Wl,--no-whole-archive \
+ -Wl,--start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ -Wl,--end-group \
+ ${1}"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ -Wl,--start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ -Wl,--end-group \
+ ${1}"
+ fi
+
+ ${CC} ${CFLAGS_vmlinux} -o ${2} \
+ -Wl,-T,${lds} \
+ ${objects} \
+ -lutil -lrt -lpthread
rm -f linux
fi
}
@@ -118,6 +191,7 @@ cleanup()
rm -f .tmp_kallsyms*
rm -f .tmp_version
rm -f .tmp_vmlinux*
+ rm -f built-in.o
rm -f System.map
rm -f vmlinux
rm -f vmlinux.o
@@ -161,13 +235,6 @@ case "${KCONFIG_CONFIG}" in
. "./${KCONFIG_CONFIG}"
esac
-#link vmlinux.o
-info LD vmlinux.o
-modpost_link vmlinux.o
-
-# modpost vmlinux.o to check for section mismatches
-${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
-
# Update version
info GEN .version
if [ ! -r .version ]; then
@@ -181,6 +248,15 @@ fi;
# final build of init/
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
+archive_builtin
+
+#link vmlinux.o
+info LD vmlinux.o
+modpost_link vmlinux.o
+
+# modpost vmlinux.o to check for section mismatches
+${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 250a7a645033..b57ade78b24a 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -28,6 +28,13 @@
#include <openssl/engine.h>
/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+/*
* Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to
* assume that it's not available and its header file is missing and that we
* should use PKCS#7 instead. Switching to the older PKCS#7 format restricts
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index a95b6c98d460..6087f210c93a 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -677,11 +677,11 @@ static const struct kernel_param_ops param_ops_aalockpolicy = {
.get = param_get_aalockpolicy
};
-static int param_set_audit(const char *val, struct kernel_param *kp);
-static int param_get_audit(char *buffer, struct kernel_param *kp);
+static int param_set_audit(const char *val, const struct kernel_param *kp);
+static int param_get_audit(char *buffer, const struct kernel_param *kp);
-static int param_set_mode(const char *val, struct kernel_param *kp);
-static int param_get_mode(char *buffer, struct kernel_param *kp);
+static int param_set_mode(const char *val, const struct kernel_param *kp);
+static int param_get_mode(char *buffer, const struct kernel_param *kp);
/* Flag values, also controllable via /sys/module/apparmor/parameters
* We define special types as we want to do additional mediation.
@@ -789,7 +789,7 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp)
return param_get_uint(buffer, kp);
}
-static int param_get_audit(char *buffer, struct kernel_param *kp)
+static int param_get_audit(char *buffer, const struct kernel_param *kp)
{
if (!policy_view_capable())
return -EPERM;
@@ -800,7 +800,7 @@ static int param_get_audit(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]);
}
-static int param_set_audit(const char *val, struct kernel_param *kp)
+static int param_set_audit(const char *val, const struct kernel_param *kp)
{
int i;
if (!policy_admin_capable())
@@ -822,7 +822,7 @@ static int param_set_audit(const char *val, struct kernel_param *kp)
return -EINVAL;
}
-static int param_get_mode(char *buffer, struct kernel_param *kp)
+static int param_get_mode(char *buffer, const struct kernel_param *kp)
{
if (!policy_admin_capable())
return -EPERM;
@@ -833,7 +833,7 @@ static int param_get_mode(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]);
}
-static int param_set_mode(const char *val, struct kernel_param *kp)
+static int param_set_mode(const char *val, const struct kernel_param *kp)
{
int i;
if (!policy_admin_capable())
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b967dcadcb4d..3d24f86b423f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2774,7 +2774,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
return rc;
/* Allow all mounts performed by the kernel */
- if (flags & MS_KERNMOUNT)
+ if (flags & (MS_KERNMOUNT | MS_SUBMOUNT))
return 0;
ad.type = LSM_AUDIT_DATA_DENTRY;
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index e27b71a5d8d5..d1ee3dfb2555 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -105,7 +105,8 @@ struct security_class_mapping secclass_map[] = {
{ COMMON_IPC_PERMS, NULL } },
{ "netlink_route_socket",
{ COMMON_SOCK_PERMS,
- "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", NULL } },
+ "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", "nlmsg_getneigh",
+ NULL } },
{ "netlink_tcpdiag_socket",
{ COMMON_SOCK_PERMS,
"nlmsg_read", "nlmsg_write", NULL } },
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index b45a3a72c161..efb852e076b6 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -79,6 +79,7 @@ enum {
#define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
extern int selinux_android_netlink_route;
+extern int selinux_android_netlink_getneigh;
extern int selinux_policycap_netpeer;
extern int selinux_policycap_openperm;
extern int selinux_policycap_alwaysnetwork;
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 78a8c420b1f5..c321361af6bf 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -192,12 +192,12 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
return err;
}
-static void nlmsg_set_getlink_perm(u32 perm)
+static void nlmsg_set_perm_for_type(u32 perm, u16 type)
{
int i;
for (i = 0; i < ARRAY_SIZE(nlmsg_route_perms); i++) {
- if (nlmsg_route_perms[i].nlmsg_type == RTM_GETLINK) {
+ if (nlmsg_route_perms[i].nlmsg_type == type) {
nlmsg_route_perms[i].perm = perm;
break;
}
@@ -207,11 +207,27 @@ static void nlmsg_set_getlink_perm(u32 perm)
/**
* Use nlmsg_readpriv as the permission for RTM_GETLINK messages if the
* netlink_route_getlink policy capability is set. Otherwise use nlmsg_read.
+ * Similarly, use nlmsg_getneigh for RTM_GETNEIGH and RTM_GETNEIGHTBL if the
+ * netlink_route_getneigh policy capability is set. Otherwise use nlmsg_read.
*/
void selinux_nlmsg_init(void)
{
if (selinux_android_netlink_route)
- nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV,
+ RTM_GETLINK);
else
- nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READ);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETLINK);
+
+ if (selinux_android_netlink_getneigh) {
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_GETNEIGH,
+ RTM_GETNEIGH);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_GETNEIGH,
+ RTM_GETNEIGHTBL);
+ } else {
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETNEIGH);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETNEIGHTBL);
+ }
}
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 5ee23e3a3678..c5b2940f4dab 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -2333,6 +2333,10 @@ int policydb_read(struct policydb *p, void *fp)
p->android_netlink_route = 1;
}
+ if ((le32_to_cpu(buf[1]) & POLICYDB_CONFIG_ANDROID_NETLINK_GETNEIGH)) {
+ p->android_netlink_getneigh = 1;
+ }
+
if (p->policyvers >= POLICYDB_VERSION_POLCAP) {
rc = ebitmap_read(&p->policycaps, fp);
if (rc)
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 0d511cf3c1e9..45698f754766 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -228,6 +228,7 @@ struct genfs {
struct policydb {
int mls_enabled;
int android_netlink_route;
+ int android_netlink_getneigh;
/* symbol tables */
struct symtab symtab[SYM_NUM];
@@ -315,6 +316,7 @@ extern int policydb_write(struct policydb *p, void *fp);
#define POLICYDB_CONFIG_MLS 1
#define POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE (1 << 31)
+#define POLICYDB_CONFIG_ANDROID_NETLINK_GETNEIGH (1 << 30)
/* the config flags related to unknown classes/perms are bits 2 and 3 */
#define REJECT_UNKNOWN 0x00000002
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 52cf500a8418..b615627d1abc 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -71,6 +71,7 @@
#include "audit.h"
int selinux_android_netlink_route;
+int selinux_android_netlink_getneigh;
int selinux_policycap_netpeer;
int selinux_policycap_openperm;
int selinux_policycap_alwaysnetwork;
@@ -2000,6 +2001,7 @@ static void security_load_policycaps(void)
POLICYDB_CAPABILITY_ALWAYSNETWORK);
selinux_android_netlink_route = policydb.android_netlink_route;
+ selinux_android_netlink_getneigh = policydb.android_netlink_getneigh;
selinux_nlmsg_init();
}
diff --git a/sound/core/control.c b/sound/core/control.c
index 43c8eac250b8..04b939df3768 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -877,24 +877,18 @@ static int snd_ctl_elem_read(struct snd_card *card,
struct snd_kcontrol *kctl;
struct snd_kcontrol_volatile *vd;
unsigned int index_offset;
- int result;
- down_read(&card->controls_rwsem);
kctl = snd_ctl_find_id(card, &control->id);
- if (kctl == NULL) {
- result = -ENOENT;
- } else {
- index_offset = snd_ctl_get_ioff(kctl, &control->id);
- vd = &kctl->vd[index_offset];
- if ((vd->access & SNDRV_CTL_ELEM_ACCESS_READ) &&
- kctl->get != NULL) {
- snd_ctl_build_ioff(&control->id, kctl, index_offset);
- result = kctl->get(kctl, control);
- } else
- result = -EPERM;
- }
- up_read(&card->controls_rwsem);
- return result;
+ if (kctl == NULL)
+ return -ENOENT;
+
+ index_offset = snd_ctl_get_ioff(kctl, &control->id);
+ vd = &kctl->vd[index_offset];
+ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
+ return -EPERM;
+
+ snd_ctl_build_ioff(&control->id, kctl, index_offset);
+ return kctl->get(kctl, control);
}
static int snd_ctl_elem_read_user(struct snd_card *card,
@@ -909,8 +903,11 @@ static int snd_ctl_elem_read_user(struct snd_card *card,
snd_power_lock(card);
result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (result >= 0)
+ if (result >= 0) {
+ down_read(&card->controls_rwsem);
result = snd_ctl_elem_read(card, control);
+ up_read(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (result >= 0)
if (copy_to_user(_control, control, sizeof(*control)))
@@ -927,30 +924,28 @@ static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file,
unsigned int index_offset;
int result;
- down_read(&card->controls_rwsem);
kctl = snd_ctl_find_id(card, &control->id);
- if (kctl == NULL) {
- result = -ENOENT;
- } else {
- index_offset = snd_ctl_get_ioff(kctl, &control->id);
- vd = &kctl->vd[index_offset];
- if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) ||
- kctl->put == NULL ||
- (file && vd->owner && vd->owner != file)) {
- result = -EPERM;
- } else {
- snd_ctl_build_ioff(&control->id, kctl, index_offset);
- result = kctl->put(kctl, control);
- }
- if (result > 0) {
- struct snd_ctl_elem_id id = control->id;
- up_read(&card->controls_rwsem);
- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &id);
- return 0;
- }
+ if (kctl == NULL)
+ return -ENOENT;
+
+ index_offset = snd_ctl_get_ioff(kctl, &control->id);
+ vd = &kctl->vd[index_offset];
+ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) || kctl->put == NULL ||
+ (file && vd->owner && vd->owner != file)) {
+ return -EPERM;
}
- up_read(&card->controls_rwsem);
- return result;
+
+ snd_ctl_build_ioff(&control->id, kctl, index_offset);
+ result = kctl->put(kctl, control);
+ if (result < 0)
+ return result;
+
+ if (result > 0) {
+ struct snd_ctl_elem_id id = control->id;
+ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &id);
+ }
+
+ return 0;
}
static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
@@ -967,8 +962,11 @@ static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
card = file->card;
snd_power_lock(card);
result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (result >= 0)
+ if (result >= 0) {
+ down_write(&card->controls_rwsem);
result = snd_ctl_elem_write(card, file, control);
+ up_write(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (result >= 0)
if (copy_to_user(_control, control, sizeof(*control)))
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index 3fd7f67e701e..f1df25922bd7 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -320,8 +320,11 @@ static int ctl_elem_read_user(struct snd_card *card,
snd_power_lock(card);
err = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (err >= 0)
+ if (err >= 0) {
+ down_read(&card->controls_rwsem);
err = snd_ctl_elem_read(card, data);
+ up_read(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (err >= 0)
err = copy_ctl_value_to_user(userdata, valuep, data,
@@ -349,8 +352,11 @@ static int ctl_elem_write_user(struct snd_ctl_file *file,
snd_power_lock(card);
err = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (err >= 0)
+ if (err >= 0) {
+ down_write(&card->controls_rwsem);
err = snd_ctl_elem_write(card, file, data);
+ up_write(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (err >= 0)
err = copy_ctl_value_to_user(userdata, valuep, data,
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 00326629d4af..69bb3fc38fb2 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,5 @@
socket
psock_fanout
psock_tpacket
+reuseport_bpf
+reuseport_bpf_cpu
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index fac4782c51d8..c658792d47b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g
CFLAGS += -I../../../../usr/include/
-NET_PROGS = socket psock_fanout psock_tpacket
+NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu
all: $(NET_PROGS)
%: %.c
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 000000000000..b5277106df1f
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,641 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result. These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+ int recv_family;
+ int send_family;
+ int protocol;
+ size_t recv_socks;
+ uint16_t recv_port;
+ uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+ return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ addr = malloc(sizeof(struct sockaddr_storage));
+ memset(addr, 0, sizeof(struct sockaddr_storage));
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr *addr = new_any_sockaddr(family, port);
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_addr = in6addr_loopback;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "GPL";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+ { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+ /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+ { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+ /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+ { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+ /* BPF_EXIT_INSN() */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = ARRAY_SIZE(prog);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+ attr.kern_version = 0;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+ struct sock_filter code[] = {
+ /* A = (uint32_t)skb[0] */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
+ /* A = A % mod */
+ { BPF_ALU | BPF_MOD, 0, 0, mod },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = ARRAY_SIZE(code),
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
+ void (*attach_bpf)(int, uint16_t))
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int i, opt;
+
+ for (i = 0; i < p.recv_socks; ++i) {
+ fd[i] = socket(p.recv_family, p.protocol, 0);
+ if (fd[i] < 0)
+ error(1, errno, "failed to create recv %d", i);
+
+ opt = 1;
+ if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+ if (i == 0)
+ attach_bpf(fd[i], mod);
+
+ if (bind(fd[i], addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket %d", i);
+
+ if (p.protocol == SOCK_STREAM) {
+ opt = 4;
+ if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+ sizeof(opt)))
+ error(1, errno,
+ "failed to set TCP_FASTOPEN on %d", i);
+ if (listen(fd[i], p.recv_socks * 10))
+ error(1, errno, "failed to listen on socket");
+ }
+ }
+ free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+ size_t len)
+{
+ struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+ struct sockaddr * const daddr =
+ new_loopback_sockaddr(p.send_family, p.recv_port);
+ const int fd = socket(p.send_family, p.protocol, 0), one = 1;
+
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+ error(1, errno, "failed to set reuseaddr");
+
+ if (bind(fd, saddr, sockaddr_size()))
+ error(1, errno, "failed to bind send socket");
+
+ if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+ free(saddr);
+ free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+ char recv_buf[8], send_buf[8];
+ struct msghdr msg;
+ struct iovec recv_io = { recv_buf, 8 };
+ struct epoll_event ev;
+ int epfd, conn, i, sport, expected;
+ uint32_t data, ndata;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (i = 0; i < p.recv_socks; ++i) {
+ ev.events = EPOLLIN;
+ ev.data.fd = fd[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+ error(1, errno, "failed to register sock %d epoll", i);
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &recv_io;
+ msg.msg_iovlen = 1;
+
+ for (data = 0; data < p.recv_socks * 2; ++data) {
+ sport = p.send_port_min + data;
+ ndata = htonl(data);
+ memcpy(send_buf, &ndata, sizeof(ndata));
+ send_from(p, sport, send_buf, sizeof(ndata));
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll wait failed");
+
+ if (p.protocol == SOCK_STREAM) {
+ conn = accept(ev.data.fd, NULL, NULL);
+ if (conn < 0)
+ error(1, errno, "error accepting");
+ i = recvmsg(conn, &msg, 0);
+ close(conn);
+ } else {
+ i = recvmsg(ev.data.fd, &msg, 0);
+ }
+ if (i < 0)
+ error(1, errno, "recvmsg error");
+ if (i != sizeof(ndata))
+ error(1, 0, "expected size %zd got %d",
+ sizeof(ndata), i);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ if (ev.data.fd == fd[i])
+ break;
+ memcpy(&ndata, recv_buf, sizeof(ndata));
+ fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+ expected = (sport % mod);
+ if (i != expected)
+ error(1, 0, "expected socket %d", expected);
+ }
+}
+
+static void test_reuseport_ebpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_ebpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_reuseport_cbpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_cbpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int fd1, fd2, opt;
+
+ fprintf(stderr, "Testing too many filters...\n");
+ fd1 = socket(p.recv_family, p.protocol, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(p.recv_family, p.protocol, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+
+ opt = 1;
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_ebpf(fd2, 10);
+
+ if (bind(fd1, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+ error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+ free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ const char bpf_license[] = "GPL";
+ struct bpf_insn ecode[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+ union bpf_attr eprog;
+ struct sock_fprog cprog;
+ int fd, bpf_fd;
+
+ fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+ memset(&eprog, 0, sizeof(eprog));
+ eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ eprog.insn_cnt = ARRAY_SIZE(ecode);
+ eprog.insns = (unsigned long) &ecode;
+ eprog.license = (unsigned long) &bpf_license;
+ eprog.kern_version = 0;
+
+ memset(&cprog, 0, sizeof(cprog));
+ cprog.len = ARRAY_SIZE(ccode);
+ cprog.filter = ccode;
+
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error");
+ fd = socket(p.recv_family, p.protocol, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create socket 1");
+
+ if (bind(fd, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+ sizeof(cprog)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+ int fd1, fd2, opt = 1;
+
+ fprintf(stderr, "Testing filter add without bind...\n");
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_cbpf(fd2, 10);
+
+ close(fd1);
+ close(fd2);
+}
+
+void enable_fastopen(void)
+{
+ int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+ int rw_mask = 3; /* bit 1: client side; bit-2 server side */
+ int val, size;
+ char buf[16];
+
+ if (fd < 0)
+ error(1, errno, "Unable to open tcp_fastopen sysctl");
+ if (read(fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read tcp_fastopen sysctl");
+ val = atoi(buf);
+ close(fd);
+
+ if ((val & rw_mask) != rw_mask) {
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (fd < 0)
+ error(1, errno,
+ "Unable to open tcp_fastopen sysctl for writing");
+ val |= rw_mask;
+ size = snprintf(buf, 16, "%d", val);
+ if (write(fd, buf, size) <= 0)
+ error(1, errno, "Unable to write tcp_fastopen sysctl");
+ close(fd);
+ }
+}
+
+static struct rlimit rlim_old;
+
+static __attribute__((constructor)) void main_ctor(void)
+{
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+
+ if (rlim_old.rlim_cur != RLIM_INFINITY) {
+ struct rlimit rlim_new;
+
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+ setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
+int main(void)
+{
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group. Run the bpf test through both paths.
+ */
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8002});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8008});
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8005});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8009});
+
+ fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+
+ /* TCP fastopen is required for the TCP tests */
+ enable_fastopen();
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8008,
+ .send_port_min = 9120});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8009,
+ .send_port_min = 9160});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8010});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8011});
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8012,
+ .send_port_min = 9200});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8013,
+ .send_port_min = 9240});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8014});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8015});
+
+ fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8016,
+ .send_port_min = 9320});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8017,
+ .send_port_min = 9360});
+
+ test_filter_without_bind();
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000000..b23d6f54de7b
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,258 @@
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet. The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core. Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them. The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ struct sock_filter code[] = {
+ /* A = raw_smp_processor_id() */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = 2,
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ cpu_set_t cpu_set;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ memset(&cpu_set, 0, sizeof(cpu_set));
+ CPU_SET(cpu_id, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+ error(1, errno, "failed to pin to cpu");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+ if (cpu_id != i)
+ error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, cpu;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (cpu = 0; cpu < len; ++cpu) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[cpu];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (cpu = 0; cpu < len; ++cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Reverse iterate */
+ for (cpu = len - 1; cpu >= 0; --cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Even cores */
+ for (cpu = 0; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Odd cores */
+ for (cpu = 1; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ close(epfd);
+ for (cpu = 0; cpu < len; ++cpu)
+ close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+ int *rcv_fd, cpus;
+
+ cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus <= 0)
+ error(1, errno, "failed counting cpus");
+
+ rcv_fd = calloc(cpus, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}