summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kbuild/00-INDEX2
-rw-r--r--Documentation/kbuild/kbuild.txt11
-rw-r--r--Documentation/kbuild/llvm.txt86
-rw-r--r--Documentation/kbuild/makefiles.txt16
-rw-r--r--Documentation/scheduler/sched-bwc.txt45
-rw-r--r--Makefile59
-rw-r--r--arch/Kconfig19
-rw-r--r--arch/arm/boot/dts/qcom/msm8996-v3.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom/msm8996pro.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom/msm8998.dtsi144
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig15
-rw-r--r--arch/arm64/Makefile35
-rw-r--r--arch/arm64/configs/msmcortex_defconfig8
-rw-r--r--arch/arm64/configs/z2_plus_defconfig2
-rw-r--r--arch/arm64/configs/z2_row_defconfig2
-rw-r--r--arch/arm64/crypto/Makefile2
-rw-r--r--arch/arm64/crypto/aes-ce-core.S87
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c (renamed from arch/arm64/crypto/aes-ce-cipher.c)117
-rw-r--r--arch/arm64/crypto/aes-modes.S16
-rw-r--r--arch/arm64/include/asm/assembler.h24
-rw-r--r--arch/arm64/include/asm/opcodes.h5
-rw-r--r--arch/arm64/include/asm/probes.h21
-rw-r--r--arch/arm64/include/asm/sysreg.h18
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/uapi/asm/setup.h2
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/app_setting.c4
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c14
-rw-r--r--arch/arm64/kernel/entry.S1
-rw-r--r--arch/arm64/kernel/image.h6
-rw-r--r--arch/arm64/kernel/insn.c1
-rw-r--r--arch/arm64/kernel/module.lds2
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c34
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h8
-rw-r--r--arch/arm64/kernel/probes/kprobes.c36
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S6
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S15
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/lib/memcpy.S3
-rw-r--r--arch/arm64/lib/memmove.S3
-rw-r--r--arch/arm64/lib/memset.S3
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c2
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/oprofile/init.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--drivers/acpi/ec.c6
-rw-r--r--drivers/acpi/sysfs.c8
-rw-r--r--drivers/android/binder.c18
-rw-r--r--drivers/base/Kconfig1
-rw-r--r--drivers/char/adsprpc.c11
-rw-r--r--drivers/char/diag/diag_dci.c20
-rw-r--r--drivers/char/ipmi/ipmi_poweroff.c2
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c4
-rw-r--r--drivers/cpufreq/intel_pstate.c1
-rw-r--r--drivers/cpuidle/lpm-levels.c17
-rw-r--r--drivers/crypto/msm/qce50.c12
-rw-r--r--drivers/edac/edac_mc_sysfs.c2
-rw-r--r--drivers/edac/edac_module.c3
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c5
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c2
-rw-r--r--drivers/firmware/efi/libstub/efistub.h2
-rw-r--r--drivers/gpu/msm/adreno.c5
-rw-r--r--drivers/gpu/msm/adreno.h9
-rw-r--r--drivers/gpu/msm/adreno_compat.c9
-rw-r--r--drivers/gpu/msm/adreno_dispatch.c4
-rw-r--r--drivers/gpu/msm/adreno_ioctl.c9
-rw-r--r--drivers/gpu/msm/adreno_sysfs.c33
-rw-r--r--drivers/gpu/msm/kgsl.c33
-rw-r--r--drivers/gpu/msm/kgsl_iommu.c10
-rw-r--r--drivers/gpu/msm/kgsl_mmu.c8
-rw-r--r--drivers/gpu/msm/kgsl_mmu.h7
-rw-r--r--drivers/gpu/msm/kgsl_pool.c10
-rw-r--r--drivers/hid/hid-magicmouse.c3
-rw-r--r--drivers/hwtracing/coresight/coresight-event.c10
-rw-r--r--drivers/ide/ide.c4
-rw-r--r--drivers/iio/Kconfig1
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c4
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2
-rw-r--r--drivers/isdn/hardware/mISDN/avmfritz.c2
-rw-r--r--drivers/isdn/hardware/mISDN/mISDNinfineon.c2
-rw-r--r--drivers/isdn/hardware/mISDN/netjet.c2
-rw-r--r--drivers/isdn/hardware/mISDN/speedfax.c2
-rw-r--r--drivers/isdn/hardware/mISDN/w6692.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/media/platform/msm/camera_v2/isp/msm_isp32.c6
-rw-r--r--drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c4
-rw-r--r--drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c2
-rw-r--r--drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c1
-rw-r--r--drivers/media/usb/uvc/uvc_driver.c4
-rw-r--r--drivers/message/fusion/mptbase.c4
-rw-r--r--drivers/misc/ibmasm/ibmasm.h2
-rw-r--r--drivers/misc/kgdbts.c3
-rw-r--r--drivers/mtd/devices/block2mtd.c2
-rw-r--r--drivers/mtd/devices/phram.c2
-rw-r--r--drivers/mtd/ubi/build.c2
-rw-r--r--drivers/net/wireless/wcnss/wcnss_wlan.c2
-rw-r--r--drivers/of/of_reserved_mem.c4
-rw-r--r--drivers/pci/pcie/aspm.c5
-rw-r--r--drivers/perf/arm_pmu.c2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c9
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c2
-rw-r--r--drivers/power/reset/msm-poweroff.c4
-rw-r--r--drivers/power/supply/qcom/qpnp-smb2.c8
-rw-r--r--drivers/power/supply/qcom/smb-lib.c24
-rw-r--r--drivers/scsi/fcoe/fcoe_transport.c20
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c2
-rw-r--r--drivers/soc/qcom/hab/hab.h8
-rw-r--r--drivers/soc/qcom/hab/hab_msg.c59
-rw-r--r--drivers/soc/qcom/hab/khab_test.c8
-rw-r--r--drivers/soc/qcom/smp2p_sleepstate.c5
-rw-r--r--drivers/staging/android/ion/ion.c9
-rw-r--r--drivers/staging/android/ion/msm/msm_ion.c31
-rw-r--r--drivers/staging/rdma/ipath/ipath_init_chip.c4
-rw-r--r--drivers/tty/serial/kgdboc.c3
-rw-r--r--drivers/usb/chipidea/otg_fsm.h2
-rw-r--r--drivers/usb/gadget/composite.c3
-rw-r--r--drivers/usb/gadget/configfs.c4
-rw-r--r--drivers/usb/gadget/function/f_accessory.c20
-rw-r--r--drivers/usb/gadget/function/rndis.c10
-rw-r--r--drivers/usb/gadget/legacy/inode.c10
-rw-r--r--drivers/usb/serial/console.c2
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/video/console/dummycon.c83
-rw-r--r--drivers/video/fbdev/msm/mdss_dsi_status.c5
-rw-r--r--drivers/video/fbdev/msm/mdss_mdp_intf_video.c2
-rw-r--r--drivers/video/fbdev/msm/msm_mdss_io_8974.c20
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/file.c14
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/cifs/cifs_dfs_ref.c7
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/exofs/inode.c12
-rw-r--r--fs/ext4/ext4.h31
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/inode.c28
-rw-r--r--fs/ext4/namei.c19
-rw-r--r--fs/ext4/super.c100
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/kernfs/mount.c3
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/locks.c23
-rw-r--r--fs/logfs/dev_bdev.c4
-rw-r--r--fs/logfs/dev_mtd.c4
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/logfs.h4
-rw-r--r--fs/namei.c1
-rw-r--r--fs/namespace.c21
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4namespace.c2
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/notify/fanotify/Kconfig1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/proc/base.c17
-rw-r--r--fs/read_write.c54
-rw-r--r--fs/super.c15
-rw-r--r--include/asm-generic/vmlinux.lds.h94
-rw-r--r--include/linux/bpf-cgroup.h15
-rw-r--r--include/linux/bpf.h28
-rw-r--r--include/linux/cgroup-defs.h6
-rw-r--r--include/linux/compat.h15
-rw-r--r--include/linux/compiler.h23
-rw-r--r--include/linux/cpufreq.h3
-rw-r--r--include/linux/debugfs.h3
-rw-r--r--include/linux/export.h14
-rw-r--r--include/linux/ftrace.h8
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/init.h38
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/irq.h29
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/moduleparam.h16
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/pagemap.h4
-rw-r--r--include/linux/pid.h5
-rw-r--r--include/linux/proc_fs.h6
-rw-r--r--include/linux/rculist_nulls.h1
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/sched/sysctl.h6
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/linux/time.h1
-rw-r--r--include/linux/udp.h8
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/inet6_hashtables.h13
-rw-r--r--include/net/inet_hashtables.h25
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/phonet/phonet.h2
-rw-r--r--include/net/ping.h2
-rw-r--r--include/net/raw.h2
-rw-r--r--include/net/sock.h18
-rw-r--r--include/net/sock_reuseport.h2
-rw-r--r--include/net/udp.h5
-rw-r--r--include/trace/events/filelock.h77
-rw-r--r--include/trace/events/sched.h7
-rw-r--r--include/uapi/asm-generic/unistd.h6
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/sched.h1
-rw-r--r--init/Kconfig10
-rw-r--r--init/Makefile2
-rw-r--r--kernel/bpf/cgroup.c33
-rw-r--r--kernel/bpf/core.c24
-rw-r--r--kernel/bpf/syscall.c57
-rw-r--r--kernel/cgroup.c31
-rw-r--r--kernel/cpuset.c18
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c137
-rw-r--r--kernel/irq/Kconfig4
-rw-r--r--kernel/irq/cpuhotplug.c8
-rw-r--r--kernel/irq/irqdesc.c14
-rw-r--r--kernel/irq/proc.c90
-rw-r--r--kernel/pid.c74
-rw-r--r--kernel/power/qos.c30
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/rcu/update.c17
-rw-r--r--kernel/sched/Makefile1
-rw-r--r--kernel/sched/core.c100
-rw-r--r--kernel/sched/cpufreq_schedutil.c32
-rw-r--r--kernel/sched/cpupri.c2
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/fair.c214
-rw-r--r--kernel/sched/hmp.c32
-rw-r--r--kernel/sched/rt.c11
-rw-r--r--kernel/sched/sched.h17
-rw-r--r--kernel/sched/stop_task.c3
-rw-r--r--kernel/sched/tune.c4
-rw-r--r--kernel/sched/walt.c5
-rw-r--r--kernel/sched/walt.h2
-rw-r--r--kernel/signal.c142
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/sysctl.c30
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/hrtimer.c3
-rw-r--r--kernel/time/timer.c2
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/trace.c4
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/ksm.c5
-rw-r--r--mm/memory.c42
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/readahead.c2
-rw-r--r--net/compat.c3
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/skbuff.c29
-rw-r--r--net/core/sock_reuseport.c58
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ipv4/af_inet.c21
-rw-r--r--net/ipv4/inet_connection_sock.c22
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_hashtables.c66
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/udp.c308
-rw-r--r--net/ipv4/udp_diag.c20
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv6/af_inet6.c25
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/inet6_hashtables.c80
-rw-r--r--net/ipv6/tcp_ipv6.c10
-rw-r--r--net/ipv6/udp.c246
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ip6.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_nat_ftp.c2
-rw-r--r--net/netfilter/nf_nat_irc.c2
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_socket.c32
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--samples/bpf/bpf_helpers.h4
-rw-r--r--scripts/Kbuild.include57
-rw-r--r--scripts/Makefile.build57
-rw-r--r--scripts/Makefile.host2
-rw-r--r--scripts/Makefile.lib4
-rw-r--r--scripts/Makefile.modpost14
-rwxr-xr-xscripts/clang-version.sh33
-rw-r--r--scripts/extract-cert.c7
-rwxr-xr-xscripts/link-vmlinux.sh118
-rwxr-xr-xscripts/sign-file.c7
-rw-r--r--security/apparmor/lsm.c16
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--security/selinux/include/classmap.h3
-rw-r--r--security/selinux/include/security.h1
-rw-r--r--security/selinux/nlmsgtab.c24
-rw-r--r--security/selinux/ss/policydb.c4
-rw-r--r--security/selinux/ss/policydb.h2
-rw-r--r--security/selinux/ss/services.c2
-rw-r--r--sound/core/control.c78
-rw-r--r--sound/core/control_compat.c10
-rw-r--r--sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c9
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c641
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_cpu.c258
319 files changed, 4766 insertions, 1503 deletions
diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index 8c5e6aa78004..b367200c94b5 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -1,5 +1,7 @@
00-INDEX
- this file: info on the kernel build process
+llvm.txt
+ - how to compile Linux with llvm
headers_install.txt
- how to export Linux headers for use by userspace
kbuild.txt
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 0ff6a466a05b..3e63c2361291 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -238,3 +238,14 @@ KBUILD_VMLINUX_MAIN
All object files for the main part of vmlinux.
KBUILD_VMLINUX_INIT and KBUILD_VMLINUX_MAIN together specify
all the object files used to link vmlinux.
+
+KBUILD_VMLINUX_LIBS
+--------------------------------------------------
+All .a "lib" files for vmlinux.
+KBUILD_VMLINUX_INIT, KBUILD_VMLINUX_MAIN, and KBUILD_VMLINUX_LIBS together
+specify all the object files used to link vmlinux.
+
+LLVM
+----
+If this variable is set to 1, Kbuild will use Clang and LLVM utilities instead
+of GCC and GNU binutils to build the kernel.
diff --git a/Documentation/kbuild/llvm.txt b/Documentation/kbuild/llvm.txt
new file mode 100644
index 000000000000..51c009baa35a
--- /dev/null
+++ b/Documentation/kbuild/llvm.txt
@@ -0,0 +1,86 @@
+Building Linux with Clang/LLVM
+==============================
+
+This document covers how to build the Linux kernel with Clang and LLVM
+utilities.
+
+About
+-----
+
+The Linux kernel has always traditionally been compiled with GNU
+toolchains such as GCC and binutils. Ongoing work has allowed for
+[Clang](https://clang.llvm.org/) and [LLVM](https://llvm.org/) utilities
+to be used as viable substitutes. Distributions such as
+[Android](https://www.android.com/),
+[ChromeOS](https://www.chromium.org/chromium-os), and
+[OpenMandriva](https://www.openmandriva.org/) use Clang built kernels.
+[LLVM is a collection of toolchain components implemented in terms of
+C++ objects](https://www.aosabook.org/en/llvm.html). Clang is a
+front-end to LLVM that supports C and the GNU C extensions required by
+the kernel, and is pronounced "klang," not "see-lang."
+
+Clang
+-----
+
+The compiler used can be swapped out via [CC=]{.title-ref} command line
+argument to [make]{.title-ref}. [CC=]{.title-ref} should be set when
+selecting a config and during a build.
+
+> make CC=clang defconfig
+>
+> make CC=clang
+
+Cross Compiling
+---------------
+
+A single Clang compiler binary will typically contain all supported
+backends, which can help simplify cross compiling.
+
+> ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
+
+[CROSS\_COMPILE]{.title-ref} is not used to prefix the Clang compiler
+binary, instead [CROSS\_COMPILE]{.title-ref} is used to set a command
+line flag: [--target <triple>]{.title-ref}. For example:
+
+> clang --target aarch64-linux-gnu foo.c
+
+LLVM Utilities
+--------------
+
+LLVM has substitutes for GNU binutils utilities. These can be invoked as
+additional parameters to [make]{.title-ref}.
+
+> make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\
+> OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\
+> READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\
+> HOSTLD=ld.lld
+
+Currently, the integrated assembler is disabled by default. You can pass
+`LLVM_IAS=1` to enable it.
+
+Getting Help
+------------
+
+- [Website](https://clangbuiltlinux.github.io/)
+- [Mailing
+ List](https://groups.google.com/forum/#!forum/clang-built-linux):
+ <clang-built-linux@googlegroups.com>
+- [Issue Tracker](https://github.com/ClangBuiltLinux/linux/issues)
+- IRC: \#clangbuiltlinux on chat.freenode.net
+- [Telegram](https://t.me/ClangBuiltLinux): \@ClangBuiltLinux
+- [Wiki](https://github.com/ClangBuiltLinux/linux/wiki)
+- [Beginner
+ Bugs](https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22)
+
+Getting LLVM
+------------
+
+- <http://releases.llvm.org/download.html>
+- <https://github.com/llvm/llvm-project>
+- <https://llvm.org/docs/GettingStarted.html>
+- <https://llvm.org/docs/CMake.html>
+- <https://apt.llvm.org/>
+- <https://www.archlinux.org/packages/extra/x86_64/llvm/>
+- <https://github.com/ClangBuiltLinux/tc-build>
+- <https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source>
+- <https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/>
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 64c84e2d06c0..fba7bed6193a 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -41,6 +41,7 @@ This document describes the Linux kernel Makefiles.
--- 6.8 Custom kbuild commands
--- 6.9 Preprocessing linker scripts
--- 6.10 Generic header files
+ --- 6.11 Post-link pass
=== 7 Kbuild syntax for exported headers
--- 7.1 no-export-headers-y
@@ -1251,6 +1252,21 @@ When kbuild executes, the following steps are followed (roughly):
For example, powerpc uses this to check relocation sanity of
the linked vmlinux file.
+--- 6.11 Post-link pass
+
+ If the file arch/xxx/Makefile.postlink exists, this makefile
+ will be invoked for post-link objects (vmlinux and modules.ko)
+ for architectures to run post-link passes on. Must also handle
+ the clean target.
+
+ This pass runs after kallsyms generation. If the architecture
+ needs to modify symbol locations, rather than manipulate the
+ kallsyms, it may be easier to add another postlink target for
+ .tmp_vmlinux? targets to be called from link-vmlinux.sh.
+
+ For example, powerpc uses this to check relocation sanity of
+ the linked vmlinux file.
+
=== 7 Kbuild syntax for exported headers
The kernel includes a set of headers that is exported to userspace.
diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt
index f6b1873f68ab..de583fbbfe42 100644
--- a/Documentation/scheduler/sched-bwc.txt
+++ b/Documentation/scheduler/sched-bwc.txt
@@ -90,6 +90,51 @@ There are two ways in which a group may become throttled:
In case b) above, even though the child may have runtime remaining it will not
be allowed to until the parent's runtime is refreshed.
+CFS Bandwidth Quota Caveats
+---------------------------
+Once a slice is assigned to a cpu it does not expire. However all but 1ms of
+the slice may be returned to the global pool if all threads on that cpu become
+unrunnable. This is configured at compile time by the min_cfs_rq_runtime
+variable. This is a performance tweak that helps prevent added contention on
+the global lock.
+
+The fact that cpu-local slices do not expire results in some interesting corner
+cases that should be understood.
+
+For cgroup cpu constrained applications that are cpu limited this is a
+relatively moot point because they will naturally consume the entirety of their
+quota as well as the entirety of each cpu-local slice in each period. As a
+result it is expected that nr_periods roughly equal nr_throttled, and that
+cpuacct.usage will increase roughly equal to cfs_quota_us in each period.
+
+For highly-threaded, non-cpu bound applications this non-expiration nuance
+allows applications to briefly burst past their quota limits by the amount of
+unused slice on each cpu that the task group is running on (typically at most
+1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only
+applies if quota had been assigned to a cpu and then not fully used or returned
+in previous periods. This burst amount will not be transferred between cores.
+As a result, this mechanism still strictly limits the task group to quota
+average usage, albeit over a longer time window than a single period. This
+also limits the burst ability to no more than 1ms per cpu. This provides
+better more predictable user experience for highly threaded applications with
+small quota limits on high core count machines. It also eliminates the
+propensity to throttle these applications while simultanously using less than
+quota amounts of cpu. Another way to say this, is that by allowing the unused
+portion of a slice to remain valid across periods we have decreased the
+possibility of wastefully expiring quota on cpu-local silos that don't need a
+full slice's amount of cpu time.
+
+The interaction between cpu-bound and non-cpu-bound-interactive applications
+should also be considered, especially when single core usage hits 100%. If you
+gave each of these applications half of a cpu-core and they both got scheduled
+on the same CPU it is theoretically possible that the non-cpu bound application
+will use up to 1ms additional quota in some periods, thereby preventing the
+cpu-bound application from fully using its quota by that same amount. In these
+instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to
+decide which application is chosen to run, as they will both be runnable and
+have remaining quota. This runtime discrepancy will be made up in the following
+periods when the interactive application idles.
+
Examples
--------
1. Limit a group to 1 CPU worth of runtime.
diff --git a/Makefile b/Makefile
index 347dc9c6d7e9..26643c97f6af 100644
--- a/Makefile
+++ b/Makefile
@@ -301,8 +301,14 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
else if [ -x /bin/bash ]; then echo /bin/bash; \
else echo sh; fi ; fi)
+ifneq ($(LLVM),)
+HOSTCC = clang
+HOSTCXX = clang++
+else
HOSTCC = gcc
HOSTCXX = g++
+endif
+
HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu89
HOSTCXXFLAGS = -O2
@@ -337,15 +343,27 @@ scripts/Kbuild.include: ;
include scripts/Kbuild.include
# Make variables (CC, etc...)
+CPP = $(CC) -E
+ifneq ($(LLVM),)
+CC = clang
+LD = ld.lld
+AR = llvm-ar
+NM = llvm-nm
+OBJCOPY = llvm-objcopy
+OBJDUMP = llvm-objdump
+READELF = llvm-readelf
+OBJSIZE = llvm-size
+STRIP = llvm-strip
+else
AS = $(CROSS_COMPILE)as
LD = $(CROSS_COMPILE)ld
CC = $(CROSS_COMPILE)gcc
-CPP = $(CC) -E
AR = $(CROSS_COMPILE)ar
NM = $(CROSS_COMPILE)nm
STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
+endif
AWK = awk
GENKSYMS = scripts/genksyms/genksyms
INSTALLKERNEL := installkernel
@@ -400,6 +418,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
KBUILD_AFLAGS_MODULE := -DMODULE
KBUILD_CFLAGS_MODULE := -DMODULE
KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
+LDFLAGS :=
CLANG_FLAGS :=
# Read KERNELRELEASE from include/config/kernel.release (if it exists)
@@ -622,7 +641,9 @@ endif
ifneq ($(GCC_TOOLCHAIN),)
CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN)
endif
+ifneq ($(LLVM_IAS),1)
CLANG_FLAGS += -no-integrated-as
+endif
CLANG_FLAGS += -Werror=unknown-warning-option
CLANG_FLAGS += $(call cc-option, -Wno-misleading-indentation)
CLANG_FLAGS += $(call cc-option, -Wno-bool-operation)
@@ -646,6 +667,11 @@ KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, attribute-alias)
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS += $(call cc-option,-fdata-sections,)
+endif
+
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
else
@@ -772,8 +798,11 @@ KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
else
KBUILD_CFLAGS += -g
endif
+ifneq ($(LLVM_IAS),1)
KBUILD_AFLAGS += -Wa,-gdwarf-2
endif
+endif
+
ifdef CONFIG_DEBUG_INFO_DWARF4
KBUILD_CFLAGS += $(call cc-option, -gdwarf-4,)
endif
@@ -883,6 +912,10 @@ LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+LDFLAGS_vmlinux += $(call ld-option, --gc-sections,)
+endif
+
ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
LDFLAGS_vmlinux += $(call ld-option, -X,)
endif
@@ -985,22 +1018,27 @@ core-y := $(patsubst %/, %/built-in.o, $(core-y))
drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y))
net-y := $(patsubst %/, %/built-in.o, $(net-y))
libs-y1 := $(patsubst %/, %/lib.a, $(libs-y))
-libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y))
-libs-y := $(libs-y1) $(libs-y2)
+libs-y2 := $(filter-out %.a, $(patsubst %/, %/built-in.o, $(libs-y)))
virt-y := $(patsubst %/, %/built-in.o, $(virt-y))
# Externally visible symbols (used by link-vmlinux.sh)
export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
-export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
+export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y2) $(drivers-y) $(net-y) $(virt-y)
+export KBUILD_VMLINUX_LIBS := $(libs-y1)
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
export LDFLAGS_vmlinux
# used by scripts/pacmage/Makefile
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
-vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
+vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) $(KBUILD_VMLINUX_LIBS)
+
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
# Final link of vmlinux
- cmd_link-vmlinux = $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux)
+ cmd_link-vmlinux = \
+ $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux); \
+ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
+
quiet_cmd_link-vmlinux = LINK $@
# Include targets which we want to
@@ -1018,8 +1056,16 @@ endif
ifdef CONFIG_GDB_SCRIPTS
$(Q)ln -fsn `cd $(srctree) && /bin/pwd`/scripts/gdb/vmlinux-gdb.py
endif
+ifdef CONFIG_TRIM_UNUSED_KSYMS
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
+ "$(MAKE) KBUILD_MODULES=1 -f $(srctree)/Makefile vmlinux_prereq"
+endif
+$(call if_changed,link-vmlinux)
+# standalone target for easier testing
+include/generated/autoksyms.h: FORCE
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh true
+
# The actual objects are generated when descending,
# make sure no implicit rule kicks in
$(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
@@ -1295,6 +1341,7 @@ $(clean-dirs):
vmlinuxclean:
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
+ $(Q)$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) clean)
clean: archclean vmlinuxclean
diff --git a/arch/Kconfig b/arch/Kconfig
index a29c6098fb98..b8629dc08ae0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -432,6 +432,25 @@ config HAVE_ARCH_WITHIN_STACK_FRAMES
and similar) by implementing an inline arch_within_stack_frames(),
which is used by CONFIG_HARDENED_USERCOPY.
+config THIN_ARCHIVES
+ def_bool y
+ help
+ Select this if the architecture wants to use thin archives
+ instead of ld -r to create the built-in.o files.
+
+config LD_DEAD_CODE_DATA_ELIMINATION
+ bool
+ help
+ Select this if the architecture wants to do dead code and
+ data elimination with the linker by compiling with
+ -ffunction-sections -fdata-sections and linking with
+ --gc-sections.
+
+ This requires that the arch annotates or otherwise protects
+ its external entry points from being discarded. Linker scripts
+ must also merge .text.*, .data.*, and .bss.* correctly into
+ output sections.
+
config HAVE_CONTEXT_TRACKING
bool
help
diff --git a/arch/arm/boot/dts/qcom/msm8996-v3.dtsi b/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
index 711e0c259c6a..aaf6b4250459 100644
--- a/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8996-v3.dtsi
@@ -941,7 +941,6 @@
};
qcom,msm-thermal {
msm_thermal_freq: qcom,vdd-apps-rstr{
- qcom,max-freq-level = <1190400>;
qcom,levels = <1036800 1555200 1555200>;
};
qcom,vdd-gfx-rstr{
diff --git a/arch/arm/boot/dts/qcom/msm8996pro.dtsi b/arch/arm/boot/dts/qcom/msm8996pro.dtsi
index f140e78a16d7..d0949d70c3ca 100644
--- a/arch/arm/boot/dts/qcom/msm8996pro.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8996pro.dtsi
@@ -1332,7 +1332,6 @@
qcom,limit-temp = <80>;
qcom,core-limit-temp = <90>;
msm_thermal_freq: qcom,vdd-apps-rstr {
- qcom,max-freq-level = <1209600>;
qcom,levels = <1056000 1516800 1516800>;
};
qcom,vdd-gfx-rstr{
diff --git a/arch/arm/boot/dts/qcom/msm8998.dtsi b/arch/arm/boot/dts/qcom/msm8998.dtsi
index ae664e48afff..b9a38ddc5ba8 100644
--- a/arch/arm/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm/boot/dts/qcom/msm8998.dtsi
@@ -49,6 +49,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea0>;
L2_0: l2-cache {
@@ -77,6 +78,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea1>;
L1_I_1: l1-icache {
@@ -100,6 +102,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea2>;
L1_I_2: l1-icache {
@@ -123,6 +126,7 @@
qcom,lmh-dcvs = <&lmh_dcvs0>;
enable-method = "psci";
efficiency = <1024>;
+ sched-energy-costs = <&CPU_COST_0 &CLUSTER_COST_0>;
next-level-cache = <&L2_0>;
qcom,ea = <&ea3>;
L1_I_3: l1-icache {
@@ -146,6 +150,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea4>;
L2_1: l2-cache {
@@ -173,6 +178,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea5>;
L1_I_101: l1-icache {
@@ -196,6 +202,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea6>;
L1_I_102: l1-icache {
@@ -219,6 +226,7 @@
qcom,lmh-dcvs = <&lmh_dcvs1>;
enable-method = "psci";
efficiency = <1536>;
+ sched-energy-costs = <&CPU_COST_1 &CLUSTER_COST_1>;
next-level-cache = <&L2_1>;
qcom,ea = <&ea7>;
L1_I_103: l1-icache {
@@ -271,6 +279,142 @@
};
};
};
+ energy-costs {
+ CPU_COST_0: core-cost0 {
+ busy-cost-data = <
+ 65 11
+ 80 20
+ 96 25
+ 113 30
+ 130 33
+ 147 35
+ 164 36
+ 181 42
+ 194 47
+ 211 54
+ 228 62
+ 243 67
+ 258 73
+ 275 79
+ 292 88
+ 308 95
+ 326 104
+ 342 111
+ 368 134
+ 384 155
+ 401 178
+ 419 201
+ >;
+ idle-cost-data = <
+ 4 4 0 0
+ >;
+ };
+ CPU_COST_1: core-cost1 {
+ busy-cost-data = <
+ 129 56
+ 148 76
+ 182 91
+ 216 105
+ 247 118
+ 278 135
+ 312 150
+ 344 162
+ 391 181
+ 419 196
+ 453 214
+ 487 229
+ 509 248
+ 546 280
+ 581 316
+ 615 354
+ 650 392
+ 676 439
+ 712 495
+ 739 565
+ 776 622
+ 803 691
+ 834 792
+ 881 889
+ 914 1059
+ 957 1244
+ 975 1375
+ 996 1549
+ 1016 1617
+ 1021 1677
+ 1024 1683
+ >;
+ idle-cost-data = <
+ 10 10 0 0
+ >;
+ };
+ CLUSTER_COST_0: cluster-cost0 {
+ busy-cost-data = <
+ 65 17
+ 80 18
+ 96 18
+ 113 20
+ 130 21
+ 147 22
+ 164 23
+ 181 24
+ 194 27
+ 211 29
+ 228 30
+ 243 32
+ 258 33
+ 275 35
+ 292 38
+ 308 39
+ 326 42
+ 342 46
+ 368 48
+ 384 53
+ 401 59
+ 419 66
+ >;
+ idle-cost-data = <
+ 31 31 31 0
+ >;
+ };
+ CLUSTER_COST_1: cluster-cost1 {
+ busy-cost-data = <
+ 129 24
+ 148 25
+ 182 26
+ 216 29
+ 247 30
+ 278 33
+ 312 35
+ 344 37
+ 391 38
+ 419 40
+ 453 43
+ 487 44
+ 509 46
+ 546 50
+ 581 54
+ 615 60
+ 650 63
+ 676 70
+ 712 74
+ 739 80
+ 776 87
+ 803 96
+ 834 104
+ 881 120
+ 914 130
+ 957 171
+ 975 178
+ 996 185
+ 1016 200
+ 1021 202
+ 1024 203
+ >;
+ idle-cost-data = <
+ 50 50 50 0
+ >;
+ };
+ };
};
soc: soc { };
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 95a000515e43..7d656525a671 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on MMU && OF
select PREEMPT_NOTIFIERS
- select ANON_INODES
select ARM_GIC
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index f4e9585226f3..fe0fb38df196 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -405,3 +405,4 @@
388 common userfaultfd sys_userfaultfd
389 common membarrier sys_membarrier
390 common mlock2 sys_mlock2
+434 common pidfd_open sys_pidfd_open
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8b055133ae2a..28f09a00143b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -458,18 +458,13 @@ config ARM64_ERRATUM_845719
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
- depends on MODULES
default y
- select ARM64_MODULE_CMODEL_LARGE
+ select ARM64_MODULE_CMODEL_LARGE if MODULES
help
- This option builds kernel modules using the large memory model in
- order to avoid the use of the ADRP instruction, which can cause
- a subsequent memory access to use an incorrect address on Cortex-A53
- parts up to r0p4.
-
- Note that the kernel itself must be linked with a version of ld
- which fixes potentially affected ADRP instructions through the
- use of veneers.
+ This option links the kernel with '--fix-cortex-a53-843419' and
+ builds modules using the large memory model in order to avoid the use
+ of the ADRP instruction, which can cause a subsequent memory access
+ to use an incorrect address on Cortex-A53 parts up to r0p4.
If unsure, say Y.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 4df2b48fae44..adc88e707c09 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,13 +10,34 @@
#
# Copyright (C) 1995-2001 by Russell King
-LDFLAGS_vmlinux :=--no-undefined -X
+LDFLAGS_vmlinux :=--no-undefined -X -z norelro
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
GZFLAGS :=-9
-ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux += -pie -Bsymbolic
+ifeq ($(CONFIG_RELOCATABLE), y)
+# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+# for relative relocs, since this leads to better Image compression
+# with the relocation offsets always being zero.
+LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \
+ $(call ld-option, --no-apply-dynamic-relocs)
+endif
+
+ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
+ ifeq ($(call ld-option, --fix-cortex-a53-843419),)
+$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
+ else
+ ifeq ($(call gold-ifversion, -lt, 114000000, y), y)
+$(warning This version of GNU gold may generate incorrect code with --fix-cortex-a53-843419;\
+ see https://sourceware.org/bugzilla/show_bug.cgi?id=21491)
+ endif
+LDFLAGS_vmlinux += --fix-cortex-a53-843419
+ endif
+else
+ ifeq ($(ld-name),gold)
+# Pass --no-fix-cortex-a53-843419 to ensure the erratum fix is disabled
+LDFLAGS += --no-fix-cortex-a53-843419
+ endif
endif
KBUILD_DEFCONFIG := defconfig
@@ -75,11 +96,15 @@ KBUILD_AFLAGS += $(lseinstr) $(vdso32)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
AS += -EB
-LD += -EB
+# Prefer the baremetal ELF build target, but not all toolchains include
+LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb)
+UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
AS += -EL
-LD += -EL
+# Same as above, prefer ELF but fall back to linux target if needed.
+LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux)
+UTS_MACHINE := aarch64
endif
CHECKFLAGS += -D__aarch64__
diff --git a/arch/arm64/configs/msmcortex_defconfig b/arch/arm64/configs/msmcortex_defconfig
index 478140987c2d..a205cdab4c6f 100644
--- a/arch/arm64/configs/msmcortex_defconfig
+++ b/arch/arm64/configs/msmcortex_defconfig
@@ -703,11 +703,11 @@ CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_IPC_LOGGING=y
CONFIG_QCOM_RTB=y
CONFIG_QCOM_RTB_SEPARATE_CPUS=y
-CONFIG_FUNCTION_TRACER=y
+#CONFIG_FUNCTION_TRACER=y
CONFIG_PREEMPTIRQ_EVENTS=y
-CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPT_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
+#CONFIG_IRQSOFF_TRACER=y
+#CONFIG_PREEMPT_TRACER=y
+#CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_CPU_FREQ_SWITCH_PROFILER=y
CONFIG_LKDTM=y
CONFIG_MEMTEST=y
diff --git a/arch/arm64/configs/z2_plus_defconfig b/arch/arm64/configs/z2_plus_defconfig
index a275d46e8043..26ee73bac32b 100644
--- a/arch/arm64/configs/z2_plus_defconfig
+++ b/arch/arm64/configs/z2_plus_defconfig
@@ -385,6 +385,8 @@ CONFIG_INPUT_GPIO=y
CONFIG_SERIAL_MSM_HS=y
CONFIG_SERIAL_MSM_SMD=y
CONFIG_DIAG_CHAR=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_MSM_LEGACY=y
# CONFIG_DEVPORT is not set
CONFIG_MSM_ADSPRPC=y
CONFIG_I2C_CHARDEV=y
diff --git a/arch/arm64/configs/z2_row_defconfig b/arch/arm64/configs/z2_row_defconfig
index fe19317f66b5..7bba1faa6e3c 100644
--- a/arch/arm64/configs/z2_row_defconfig
+++ b/arch/arm64/configs/z2_row_defconfig
@@ -391,6 +391,8 @@ CONFIG_DRV2605L_HAPTICS=y
CONFIG_SERIAL_MSM_HS=y
CONFIG_SERIAL_MSM_SMD=y
CONFIG_DIAG_CHAR=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_MSM_LEGACY=y
# CONFIG_DEVPORT is not set
CONFIG_MSM_ADSPRPC=y
CONFIG_I2C_CHARDEV=y
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index f0a8f2475ea3..1071dfcdaf1b 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_CRYPTO_POLY_HASH_ARM64_CE) += poly-hash-ce.o
poly-hash-ce-y := poly-hash-ce-glue.o poly-hash-ce-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8efdfdade393
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .arch armv8-a+crypto
+
+ENTRY(__aes_ce_encrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aese v0.16b, v2.16b
+ aesmc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aese v0.16b, v1.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aese v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+ENDPROC(__aes_ce_encrypt)
+
+ENTRY(__aes_ce_decrypt)
+ sub w3, w3, #2
+ ld1 {v0.16b}, [x2]
+ ld1 {v1.4s}, [x0], #16
+ cmp w3, #10
+ bmi 0f
+ bne 3f
+ mov v3.16b, v1.16b
+ b 2f
+0: mov v2.16b, v1.16b
+ ld1 {v3.4s}, [x0], #16
+1: aesd v0.16b, v2.16b
+ aesimc v0.16b, v0.16b
+2: ld1 {v1.4s}, [x0], #16
+ aesd v0.16b, v3.16b
+ aesimc v0.16b, v0.16b
+3: ld1 {v2.4s}, [x0], #16
+ subs w3, w3, #3
+ aesd v0.16b, v1.16b
+ aesimc v0.16b, v0.16b
+ ld1 {v3.4s}, [x0], #16
+ bpl 1b
+ aesd v0.16b, v2.16b
+ eor v0.16b, v0.16b, v3.16b
+ st1 {v0.16b}, [x1]
+ ret
+ENDPROC(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ * substitution on each byte in 'input'
+ */
+ENTRY(__aes_ce_sub)
+ dup v1.4s, w0
+ movi v0.16b, #0
+ aese v0.16b, v1.16b
+ umov w0, v0.s[0]
+ ret
+ENDPROC(__aes_ce_sub)
+
+ENTRY(__aes_ce_invert)
+ ld1 {v0.4s}, [x1]
+ aesimc v1.16b, v0.16b
+ st1 {v1.4s}, [x0]
+ ret
+ENDPROC(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-glue.c
index 50d9fe11d0c8..60af7bddfbb9 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -24,6 +24,13 @@ struct aes_block {
u8 b[AES_BLOCK_SIZE];
};
+asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+ const struct aes_block *in);
+
static int num_rounds(struct crypto_aes_ctx *ctx)
{
/*
@@ -39,113 +46,21 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- struct aes_block *out = (struct aes_block *)dst;
- struct aes_block const *in = (struct aes_block *)src;
- void *dummy0;
- int dummy1;
kernel_neon_begin_partial(4);
-
- __asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
- " cmp %w[rounds], #10 ;"
- " bmi 0f ;"
- " bne 3f ;"
- " mov v3.16b, v1.16b ;"
- " b 2f ;"
- "0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- "1: aese v0.16b, v2.16b ;"
- " aesmc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
- " aese v0.16b, v3.16b ;"
- " aesmc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
- " subs %w[rounds], %w[rounds], #3 ;"
- " aese v0.16b, v1.16b ;"
- " aesmc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- " bpl 1b ;"
- " aese v0.16b, v2.16b ;"
- " eor v0.16b, v0.16b, v3.16b ;"
- " st1 {v0.16b}, %[out] ;"
-
- : [out] "=Q"(*out),
- [key] "=r"(dummy0),
- [rounds] "=r"(dummy1)
- : [in] "Q"(*in),
- "1"(ctx->key_enc),
- "2"(num_rounds(ctx) - 2)
- : "cc");
-
+ __aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
kernel_neon_end();
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- struct aes_block *out = (struct aes_block *)dst;
- struct aes_block const *in = (struct aes_block *)src;
- void *dummy0;
- int dummy1;
kernel_neon_begin_partial(4);
-
- __asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.16b}, [%[key]], #16 ;"
- " cmp %w[rounds], #10 ;"
- " bmi 0f ;"
- " bne 3f ;"
- " mov v3.16b, v1.16b ;"
- " b 2f ;"
- "0: mov v2.16b, v1.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- "1: aesd v0.16b, v2.16b ;"
- " aesimc v0.16b, v0.16b ;"
- "2: ld1 {v1.16b}, [%[key]], #16 ;"
- " aesd v0.16b, v3.16b ;"
- " aesimc v0.16b, v0.16b ;"
- "3: ld1 {v2.16b}, [%[key]], #16 ;"
- " subs %w[rounds], %w[rounds], #3 ;"
- " aesd v0.16b, v1.16b ;"
- " aesimc v0.16b, v0.16b ;"
- " ld1 {v3.16b}, [%[key]], #16 ;"
- " bpl 1b ;"
- " aesd v0.16b, v2.16b ;"
- " eor v0.16b, v0.16b, v3.16b ;"
- " st1 {v0.16b}, %[out] ;"
-
- : [out] "=Q"(*out),
- [key] "=r"(dummy0),
- [rounds] "=r"(dummy1)
- : [in] "Q"(*in),
- "1"(ctx->key_dec),
- "2"(num_rounds(ctx) - 2)
- : "cc");
-
+ __aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
kernel_neon_end();
}
-/*
- * aes_sub() - use the aese instruction to perform the AES sbox substitution
- * on each byte in 'input'
- */
-static u32 aes_sub(u32 input)
-{
- u32 ret;
-
- __asm__("dup v1.4s, %w[in] ;"
- "movi v0.16b, #0 ;"
- "aese v0.16b, v1.16b ;"
- "umov %w[out], v0.4s[0] ;"
-
- : [out] "=r"(ret)
- : [in] "r"(input)
- : "v0","v1");
-
- return ret;
-}
-
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len)
{
@@ -174,9 +89,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
u32 *rko = rki + kwords;
#ifndef CONFIG_CPU_BIG_ENDIAN
- rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+ rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
#else
- rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+ rko[0] = rol32(__aes_ce_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
rki[0];
#endif
rko[1] = rko[0] ^ rki[1];
@@ -191,7 +106,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
} else if (key_len == AES_KEYSIZE_256) {
if (i >= 6)
break;
- rko[4] = aes_sub(rko[3]) ^ rki[4];
+ rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
rko[5] = rko[4] ^ rki[5];
rko[6] = rko[5] ^ rki[6];
rko[7] = rko[6] ^ rki[7];
@@ -210,13 +125,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
key_dec[0] = key_enc[j];
for (i = 1, j--; j > 0; i++, j--)
- __asm__("ld1 {v0.16b}, %[in] ;"
- "aesimc v1.16b, v0.16b ;"
- "st1 {v1.16b}, %[out] ;"
-
- : [out] "=Q"(key_dec[i])
- : [in] "Q"(key_enc[j])
- : "v0","v1");
+ __aes_ce_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
kernel_neon_end();
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 838dad5c209f..067b4e222f74 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -297,17 +297,19 @@ AES_ENTRY(aes_ctr_encrypt)
eor v1.16b, v1.16b, v3.16b
st1 {v0.16b-v1.16b}, [x0], #32
#else
- ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w8
+ add w7, w6, #1
mov v0.16b, v4.16b
- add v7.4s, v7.4s, v8.4s
+ add w8, w6, #2
mov v1.16b, v4.16b
- rev32 v8.16b, v7.16b
+ add w9, w6, #3
mov v2.16b, v4.16b
+ rev w7, w7
mov v3.16b, v4.16b
- mov v1.s[3], v8.s[0]
- mov v2.s[3], v8.s[1]
- mov v3.s[3], v8.s[2]
+ rev w8, w8
+ mov v1.s[3], w7
+ rev w9, w9
+ mov v2.s[3], w8
+ mov v3.s[3], w9
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
do_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 7dcfd83ff5e8..5b7de95d6a48 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -329,21 +329,33 @@ lr .req x30 // link register
* size: size of the region
* Corrupts: kaddr, size, tmp1, tmp2
*/
+ .macro __dcache_op_workaround_clean_cache, op, kaddr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \kaddr
+alternative_else
+ dc civac, \kaddr
+alternative_endif
+ .endm
+
.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
dcache_line_size \tmp1, \tmp2
add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
bic \kaddr, \kaddr, \tmp2
9998:
- .if (\op == cvau || \op == cvac)
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
+ .ifc \op, cvau
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvac
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvap
+ sys 3, c7, c12, 1, \kaddr // dc cvap
.else
dc \op, \kaddr
.endif
+ .endif
+ .endif
add \kaddr, \kaddr, \tmp1
cmp \kaddr, \size
b.lo 9998b
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
deleted file mode 100644
index 123f45d92cd1..000000000000
--- a/arch/arm64/include/asm/opcodes.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
-#endif
-
-#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 5af574d632fa..6a5b28904c33 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -15,21 +15,22 @@
#ifndef _ARM_PROBES_H
#define _ARM_PROBES_H
-#include <asm/opcodes.h>
-
-struct kprobe;
-struct arch_specific_insn;
-
-typedef u32 kprobe_opcode_t;
-typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+typedef u32 probe_opcode_t;
+typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
/* architecture specific copy of original instruction */
-struct arch_specific_insn {
- kprobe_opcode_t *insn;
+struct arch_probe_insn {
+ probe_opcode_t *insn;
pstate_check_t *pstate_cc;
- kprobes_handler_t *handler;
+ probes_handler_t *handler;
/* restore address after step xol */
unsigned long restore;
};
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
#endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index c768daa084ca..6bc2f3d28b68 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,8 +22,6 @@
#include <linux/stringify.h>
-#include <asm/opcodes.h>
-
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -37,6 +35,14 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+#ifdef __ASSEMBLY__
+// The space separator is omitted so that __emit_inst(x) can be parsed as
+// either an assembler directive or an assembler macro argument.
+#define __emit_inst(x) .inst(x)
+#else
+#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
+#endif
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -81,10 +87,10 @@
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
-#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
- (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
- (!!x)<<8 | 0x1f)
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \
+ (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \
+ (!!x)<<8 | 0x1f)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 41e58fe3c041..63469acc5992 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2)
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5)
-#define __NR_compat_syscalls 390
+#define __NR_compat_syscalls 435
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5b925b761a2a..65e8559fe4aa 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -801,6 +801,8 @@ __SYSCALL(__NR_execveat, compat_sys_execveat)
__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
#define __NR_membarrier 389
__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/uapi/asm/setup.h b/arch/arm64/include/uapi/asm/setup.h
index 9cf2e46fbbdf..7474c813d5ab 100644
--- a/arch/arm64/include/uapi/asm/setup.h
+++ b/arch/arm64/include/uapi/asm/setup.h
@@ -21,6 +21,6 @@
#include <linux/types.h>
-#define COMMAND_LINE_SIZE 2048
+#define COMMAND_LINE_SIZE 4096
#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 18938199a838..cd3597a75b3b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -10,6 +10,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
# Object file lists.
arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
diff --git a/arch/arm64/kernel/app_setting.c b/arch/arm64/kernel/app_setting.c
index 0c6b00317645..9ee7a753c080 100644
--- a/arch/arm64/kernel/app_setting.c
+++ b/arch/arm64/kernel/app_setting.c
@@ -28,7 +28,7 @@ static struct kparam_string kps = {
.string = lib_str,
.maxlen = MAX_LEN,
};
-static int set_name(const char *str, struct kernel_param *kp);
+static int set_name(const char *str, const struct kernel_param *kp);
module_param_call(lib_name, set_name, param_get_string, &kps, S_IWUSR);
bool use_app_setting = true;
@@ -43,7 +43,7 @@ bool use_32bit_app_setting_pro;
module_param(use_32bit_app_setting_pro, bool, 0644);
MODULE_PARM_DESC(use_32bit_app_setting_pro, "control use of 32 bit app specific settings");
-static int set_name(const char *str, struct kernel_param *kp)
+static int set_name(const char *str, const struct kernel_param *kp)
{
int len = strlen(str);
char *name;
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index e012ecd018ee..89160fcccf8b 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -16,7 +16,6 @@
#include <asm/cpufeature.h>
#include <asm/insn.h>
-#include <asm/opcodes.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
@@ -63,6 +62,7 @@ struct insn_emulation {
static LIST_HEAD(insn_emulation);
static int nr_insn_emulated __initdata;
static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+static DEFINE_MUTEX(insn_emulation_mutex);
static void register_emulation_hooks(struct insn_emulation_ops *ops)
{
@@ -208,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret = 0;
- struct insn_emulation *insn = (struct insn_emulation *) table->data;
+ struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode);
enum insn_emulation_mode prev_mode = insn->current_mode;
- table->data = &insn->current_mode;
+ mutex_lock(&insn_emulation_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write || prev_mode == insn->current_mode)
@@ -224,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write,
update_insn_emulation_mode(insn, INSN_UNDEF);
}
ret:
- table->data = insn;
+ mutex_unlock(&insn_emulation_mutex);
return ret;
}
@@ -254,7 +254,7 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
sysctl->maxlen = sizeof(int);
sysctl->procname = insn->ops->name;
- sysctl->data = insn;
+ sysctl->data = &insn->current_mode;
sysctl->extra1 = &insn->min;
sysctl->extra2 = &insn->max;
sysctl->proc_handler = emulation_proc_handler;
@@ -366,6 +366,10 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDTEST_FAIL 0
+#define ARM_OPCODE_CONDTEST_PASS 1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
#define ARM_OPCODE_CONDITION_UNCOND 0xf
static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f0ca0eb3b077..242f008666e5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -144,7 +144,6 @@ alternative_else_nop_endif
#else
str x20, [tsk, #TI_ADDR_LIMIT]
#endif
- ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 86d444f9c2c1..0c46b0b5ba29 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -73,7 +73,11 @@
#ifdef CONFIG_EFI
-__efistub_stext_offset = stext - _text;
+/*
+ * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol:
+ * https://github.com/ClangBuiltLinux/linux/issues/561
+ */
+__efistub_stext_offset = ABSOLUTE(stext - _text);
/*
* Prevent the symbol aliases below from being emitted into the kallsyms
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index a3f8f8bbfc92..59a4139b3294 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,7 +30,6 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
-#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index 8949f6c6f729..05881e2b414c 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,3 +1,3 @@
SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
+ .plt : { BYTE(0) }
}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index f7931d900bca..6ece25660da0 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -77,8 +77,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD If instruction is supported and uses instruction slot,
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
-static enum kprobe_insn __kprobes
-arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
{
/*
* Instructions reading or modifying the PC won't work from the XOL
@@ -88,26 +88,26 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD;
if (aarch64_insn_is_bcond(insn)) {
- asi->handler = simulate_b_cond;
+ api->handler = simulate_b_cond;
} else if (aarch64_insn_is_cbz(insn) ||
aarch64_insn_is_cbnz(insn)) {
- asi->handler = simulate_cbz_cbnz;
+ api->handler = simulate_cbz_cbnz;
} else if (aarch64_insn_is_tbz(insn) ||
aarch64_insn_is_tbnz(insn)) {
- asi->handler = simulate_tbz_tbnz;
+ api->handler = simulate_tbz_tbnz;
} else if (aarch64_insn_is_adr_adrp(insn)) {
- asi->handler = simulate_adr_adrp;
+ api->handler = simulate_adr_adrp;
} else if (aarch64_insn_is_b(insn) ||
aarch64_insn_is_bl(insn)) {
- asi->handler = simulate_b_bl;
+ api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
- asi->handler = simulate_br_blr_ret;
+ api->handler = simulate_br_blr_ret;
} else if (aarch64_insn_is_ldr_lit(insn)) {
- asi->handler = simulate_ldr_literal;
+ api->handler = simulate_ldr_literal;
} else if (aarch64_insn_is_ldrsw_lit(insn)) {
- asi->handler = simulate_ldrsw_literal;
+ api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -119,6 +119,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD_NO_SLOT;
}
+#ifdef CONFIG_KPROBES
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
@@ -137,13 +138,13 @@ is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
return false;
}
-enum kprobe_insn __kprobes
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
- enum kprobe_insn decoded;
- kprobe_opcode_t insn = le32_to_cpu(*addr);
- kprobe_opcode_t *scan_start = addr - 1;
- kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
+ enum probe_insn decoded;
+ probe_opcode_t insn = le32_to_cpu(*addr);
+ probe_opcode_t *scan_start = addr - 1;
+ probe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
struct module *mod;
#endif
@@ -164,7 +165,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
preempt_enable();
}
#endif
- decoded = arm_probe_decode_insn(insn, asi);
+ decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded == INSN_REJECTED ||
is_probed_address_atomic(scan_start, scan_end))
@@ -172,3 +173,4 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
return decoded;
}
+#endif
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index d438289646a6..76d3f315407f 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -23,13 +23,17 @@
*/
#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
-enum kprobe_insn {
+enum probe_insn {
INSN_REJECTED,
INSN_GOOD_NO_SLOT,
INSN_GOOD,
};
-enum kprobe_insn __kprobes
+#ifdef CONFIG_KPROBES
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+#endif
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 4ea8433011d0..40278b937088 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,31 +44,31 @@ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
- p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+ p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
- flush_icache_range((uintptr_t) (p->ainsn.insn),
- (uintptr_t) (p->ainsn.insn) +
+ flush_icache_range((uintptr_t) (p->ainsn.api.insn),
+ (uintptr_t) (p->ainsn.api.insn) +
MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.restore = (unsigned long) p->addr +
+ p->ainsn.api.restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.restore = 0;
+ p->ainsn.api.restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (p->ainsn.handler)
- p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(kcb, regs);
@@ -98,18 +98,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.insn = NULL;
+ p->ainsn.api.insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn)
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
return -ENOMEM;
break;
};
/* prepare the instruction */
- if (p->ainsn.insn)
+ if (p->ainsn.api.insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -142,9 +142,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.insn) {
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
+ if (p->ainsn.api.insn) {
+ free_insn_slot(p->ainsn.api.insn, 0);
+ p->ainsn.api.insn = NULL;
}
}
@@ -239,9 +239,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.insn) {
+ if (p->ainsn.api.insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.insn;
+ slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
@@ -293,8 +293,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
return;
/* return addr restore if non-branching insn */
- if (cur->ainsn.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.restore);
+ if (cur->ainsn.api.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.api.restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 01f259ec5700..1d76ffb34d40 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -307,7 +307,7 @@ void __init setup_arch(char **cmdline_p)
{
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
- sprintf(init_utsname()->machine, ELF_PLATFORM);
+ sprintf(init_utsname()->machine, UTS_MACHINE);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index c39872a7b03c..19f7e1d6fc24 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -220,7 +220,7 @@ realtime:
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
monotonic:
@@ -243,7 +243,7 @@ monotonic:
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
monotonic_raw:
@@ -264,7 +264,7 @@ monotonic_raw:
clock_nsec=x15, nsec_to_sec=x9
add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
+ clock_gettime_return shift=1
ALIGN
realtime_coarse:
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 71c8076bbc60..d0cf2910d808 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -61,7 +61,7 @@ jiffies = jiffies_64;
#define TRAMP_TEXT \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
- *(.entry.tramp.text) \
+ KEEP(*(.entry.tramp.text)) \
. = ALIGN(PAGE_SIZE); \
VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
#else
@@ -115,7 +115,8 @@ SECTIONS
*(.discard)
*(.discard.*)
*(.interp .dynamic)
- *(.dynsym .dynstr .hash)
+ *(.dynsym .dynstr .hash .gnu.hash)
+ *(.eh_frame)
}
. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -178,18 +179,18 @@ SECTIONS
. = ALIGN(4);
.altinstructions : {
__alt_instructions = .;
- *(.altinstructions)
+ KEEP(*(.altinstructions))
__alt_instructions_end = .;
}
.altinstr_replacement : {
- *(.altinstr_replacement)
+ KEEP(*(.altinstr_replacement))
}
- .rela : ALIGN(8) {
+ .rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
- __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
- __rela_size = SIZEOF(.rela);
+ __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
+ __rela_size = SIZEOF(.rela.dyn);
. = ALIGN(SEGMENT_ALIGN);
__init_end = .;
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a5272c07d1cb..9408780f4479 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,6 @@ config KVM
depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 67613937711f..dfedd4ab1a76 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -68,9 +68,8 @@
stp \ptr, \regB, [\regC], \val
.endm
- .weak memcpy
ENTRY(__memcpy)
-ENTRY(memcpy)
+WEAK(memcpy)
#include "copy_template.S"
ret
ENDPIPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index a5a4459013b1..e3de8f05c21a 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -57,9 +57,8 @@ C_h .req x12
D_l .req x13
D_h .req x14
- .weak memmove
ENTRY(__memmove)
-ENTRY(memmove)
+WEAK(memmove)
cmp dstin, src
b.lo __memcpy
add tmp1, src, count
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index f2670a9f218c..316263c47c00 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -54,9 +54,8 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
- .weak memset
ENTRY(__memset)
-ENTRY(memset)
+WEAK(memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 2ae12825529f..078ae1a95965 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -18,7 +18,6 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select KVM_MMIO
select SRCU
---help---
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index c2024ac9d4e8..93a4b562aa5b 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
config KVM
bool
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_EVENTFD
select SRCU
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index b126ce49ae7b..03b5f6bf2584 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -713,7 +713,7 @@ static void cmm_exit(void)
* Return value:
* 0 on success / other on failure
**/
-static int cmm_set_disable(const char *val, struct kernel_param *kp)
+static int cmm_set_disable(const char *val, const struct kernel_param *kp)
{
int disable = simple_strtoul(val, NULL, 10);
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52cf0e57..6c39e24abe84 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -20,7 +20,6 @@ config KVM
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_EVENTFD
select KVM_ASYNC_PF
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 9cfa2ffaa9d6..9e6ae335f7da 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -51,7 +51,7 @@ enum __force_cpu_type {
};
static int force_cpu_type;
-static int set_cpu_type(const char *str, struct kernel_param *kp)
+static int set_cpu_type(const char *str, const struct kernel_param *kp)
{
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51601246461a..38a0643783bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,7 +19,6 @@ config X86
def_bool y
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
- select ANON_INODES
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index e62f4401e792..a63934de4e58 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -383,3 +383,5 @@
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
+424 i386 pidfd_send_signal sys_pidfd_send_signal
+434 i386 pidfd_open sys_pidfd_open
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index aa51cdd2a11a..e0e52d5fcda8 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -332,6 +332,8 @@
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
+424 common pidfd_send_signal sys_pidfd_send_signal
+434 common pidfd_open sys_pidfd_open
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 53b7f53f6207..c88de752cabc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -26,7 +26,6 @@ config KVM
depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
- select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f24bd7249536..a8bb10a78bc7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -614,7 +614,7 @@ enum __force_cpu_type {
static int force_cpu_type;
-static int set_cpu_type(const char *str, struct kernel_param *kp)
+static int set_cpu_type(const char *str, const struct kernel_param *kp)
{
if (!strcmp(str, "timer")) {
force_cpu_type = timer;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3096c087b732..d5d26464cac8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1608,7 +1608,8 @@ error:
return -ENODEV;
}
-static int param_set_event_clearing(const char *val, struct kernel_param *kp)
+static int param_set_event_clearing(const char *val,
+ const struct kernel_param *kp)
{
int result = 0;
@@ -1626,7 +1627,8 @@ static int param_set_event_clearing(const char *val, struct kernel_param *kp)
return result;
}
-static int param_get_event_clearing(char *buffer, struct kernel_param *kp)
+static int param_get_event_clearing(char *buffer,
+ const struct kernel_param *kp)
{
switch (ec_event_clearing) {
case ACPI_EC_EVT_TIMING_STATUS:
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 13c308323e94..764047d65d04 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -227,7 +227,8 @@ module_param_cb(trace_method_name, &param_ops_trace_method, &trace_method_name,
module_param_cb(trace_debug_layer, &param_ops_trace_attrib, &acpi_gbl_trace_dbg_layer, 0644);
module_param_cb(trace_debug_level, &param_ops_trace_attrib, &acpi_gbl_trace_dbg_level, 0644);
-static int param_set_trace_state(const char *val, struct kernel_param *kp)
+static int param_set_trace_state(const char *val,
+ const struct kernel_param *kp)
{
acpi_status status;
const char *method = trace_method_name;
@@ -263,7 +264,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
return 0;
}
-static int param_get_trace_state(char *buffer, struct kernel_param *kp)
+static int param_get_trace_state(char *buffer, const struct kernel_param *kp)
{
if (!(acpi_gbl_trace_flags & ACPI_TRACE_ENABLED))
return sprintf(buffer, "disable");
@@ -292,7 +293,8 @@ MODULE_PARM_DESC(aml_debug_output,
"To enable/disable the ACPI Debug Object output.");
/* /sys/module/acpi/parameters/acpica_version */
-static int param_get_acpica_version(char *buffer, struct kernel_param *kp)
+static int param_get_acpica_version(char *buffer,
+ const struct kernel_param *kp)
{
int result;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 08e75d72ddff..370f1452710f 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -147,7 +147,7 @@ static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait);
static int binder_stop_on_user_error;
static int binder_set_stop_on_user_error(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
int ret;
@@ -1968,6 +1968,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc,
}
ret = binder_inc_ref_olocked(ref, strong, target_list);
*rdata = ref->data;
+ if (ret && ref == new_ref) {
+ /*
+ * Cleanup the failed reference here as the target
+ * could now be dead and have already released its
+ * references by now. Calling on the new reference
+ * with strong=0 and a tmp_refs will not decrement
+ * the node. The new_ref gets kfree'd below.
+ */
+ binder_cleanup_ref_olocked(new_ref);
+ ref = NULL;
+ }
+
binder_proc_unlock(proc);
if (new_ref && ref != new_ref)
/*
@@ -4050,7 +4062,7 @@ static int binder_wait_for_work(struct binder_thread *thread,
binder_inner_proc_lock(proc);
list_del_init(&thread->waiting_thread_node);
if (signal_pending(current)) {
- ret = -ERESTARTSYS;
+ ret = -EINTR;
break;
}
}
@@ -4979,7 +4991,7 @@ err:
if (thread)
thread->looper_need_return = false;
wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2);
- if (ret && ret != -ERESTARTSYS)
+ if (ret && ret != -EINTR)
pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret);
err_unlocked:
trace_binder_ioctl_done(ret);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 602cbb04bee8..a4e8a2d26f6e 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -239,7 +239,6 @@ source "drivers/base/regmap/Kconfig"
config DMA_SHARED_BUFFER
bool
default n
- select ANON_INODES
help
This option enables the framework for buffer-sharing between
multiple drivers. A buffer is associated with a file using driver
diff --git a/drivers/char/adsprpc.c b/drivers/char/adsprpc.c
index b5b239eb6dc3..69bfaa0bc6f4 100644
--- a/drivers/char/adsprpc.c
+++ b/drivers/char/adsprpc.c
@@ -2747,6 +2747,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
char *fileinfo = NULL;
char single_line[UL_SIZE] = "----------------";
char title[UL_SIZE] = "=========================";
+ unsigned long irq_flags = 0;
fileinfo = kzalloc(DEBUGFS_SIZE, GFP_KERNEL);
if (!fileinfo)
@@ -2809,6 +2810,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n", single_line, single_line,
single_line, single_line, single_line);
+ spin_lock_irqsave(&me->hlock, irq_flags);
hlist_for_each_entry_safe(gmaps, n, &me->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20d|0x%-18llX|0x%-18X|0x%-20lX\n\n",
@@ -2816,18 +2818,21 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
(uint32_t)gmaps->size,
gmaps->va);
}
+ spin_unlock_irqrestore(&me->hlock, irq_flags);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s|%-20s|%-20s\n",
"len", "refs", "raddr", "flags");
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n", single_line, single_line,
single_line, single_line, single_line);
+ spin_lock_irqsave(&me->hlock, irq_flags);
hlist_for_each_entry_safe(gmaps, n, &me->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"0x%-18X|%-20d|%-20lu|%-20u\n",
(uint32_t)gmaps->len, gmaps->refs,
gmaps->raddr, gmaps->flags);
}
+ spin_unlock_irqrestore(&me->hlock, irq_flags);
} else {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"\n%s %13s %d\n", "cid", ":", fl->cid);
@@ -2869,6 +2874,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"0x%-20lX|0x%-20llX|0x%-20zu\n\n",
@@ -2878,6 +2884,7 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s %d\n\n",
"DEV_MINOR:", fl->dev_minor);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s|%-20s|%-20s\n",
"len", "refs",
@@ -2886,23 +2893,27 @@ static ssize_t fastrpc_debugfs_read(struct file *filp, char __user *buffer,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20zu|%-20d|0x%-20lX|%-20d\n\n",
map->len, map->refs, map->raddr,
map->uncached);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20s|%-20s\n", "secure", "attr");
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%s%s%s%s%s\n",
single_line, single_line, single_line,
single_line, single_line);
+ mutex_lock(&fl->map_mutex);
hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"%-20d|0x%-20lX\n\n",
map->secure, map->attr);
}
+ mutex_unlock(&fl->map_mutex);
len += scnprintf(fileinfo + len, DEBUGFS_SIZE - len,
"\n%s %s %s\n", title,
" LIST OF PENDING SMQCONTEXTS ", title);
diff --git a/drivers/char/diag/diag_dci.c b/drivers/char/diag/diag_dci.c
index 973bc3b1c5b5..ed52ebbb786a 100644
--- a/drivers/char/diag/diag_dci.c
+++ b/drivers/char/diag/diag_dci.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1615,7 +1616,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
return -EIO;
}
- mutex_lock(&driver->dci_mutex);
/* prepare DCI packet */
header.start = CONTROL_CHAR;
header.version = 1;
@@ -1634,7 +1634,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
diag_update_pkt_buffer(driver->apps_dci_buf, write_len,
DCI_PKT_TYPE);
diag_update_sleeping_process(entry->pid, DCI_PKT_TYPE);
- mutex_unlock(&driver->dci_mutex);
return DIAG_DCI_NO_ERROR;
}
@@ -1654,7 +1653,6 @@ static int diag_send_dci_pkt(struct diag_cmd_reg_t *entry,
entry->proc);
status = DIAG_DCI_SEND_DATA_FAIL;
}
- mutex_unlock(&driver->dci_mutex);
return status;
}
@@ -1734,7 +1732,16 @@ static int diag_send_dci_pkt_remote(unsigned char *data, int len, int tag,
write_len += dci_header_size;
*(int *)(buf + write_len) = tag;
write_len += sizeof(int);
- memcpy(buf + write_len, data, len);
+ if ((write_len + len) < DIAG_MDM_BUF_SIZE) {
+ memcpy(buf + write_len, data, len);
+ } else {
+ pr_err("diag: skip writing invalid length packet, token: %d, pkt_len: %d\n",
+ token, (write_len + len));
+ spin_lock_irqsave(&driver->dci_mempool_lock, flags);
+ diagmem_free(driver, buf, dci_ops_tbl[token].mempool);
+ spin_unlock_irqrestore(&driver->dci_mempool_lock, flags);
+ return -EAGAIN;
+ }
write_len += len;
*(buf + write_len) = CONTROL_CHAR; /* End Terminator */
write_len += sizeof(uint8_t);
@@ -2115,8 +2122,11 @@ static int diag_process_dci_pkt_rsp(unsigned char *buf, int len)
if (temp_entry) {
reg_item = container_of(temp_entry, struct diag_cmd_reg_t,
entry);
- ret = diag_send_dci_pkt(reg_item, req_buf, req_len,
+ mutex_lock(&driver->dci_mutex);
+ if (req_entry)
+ ret = diag_send_dci_pkt(reg_item, req_buf, req_len,
req_entry->tag);
+ mutex_unlock(&driver->dci_mutex);
} else {
DIAG_LOG(DIAG_DEBUG_DCI, "Command not found: %02x %02x %02x\n",
reg_entry.cmd_code, reg_entry.subsys_id,
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index 9f2e3be2c5b8..676c910e990f 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -66,7 +66,7 @@ static void (*specific_poweroff_func)(ipmi_user_t user);
/* Holds the old poweroff function so we can restore it on removal. */
static void (*old_poweroff_func)(void);
-static int set_param_ifnum(const char *val, struct kernel_param *kp)
+static int set_param_ifnum(const char *val, const struct kernel_param *kp)
{
int rv = param_set_int(val, kp);
if (rv)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2f8ff63bbbe4..1ad69e8d6d6d 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1357,7 +1357,7 @@ static unsigned int num_slave_addrs;
#define IPMI_MEM_ADDR_SPACE 1
static char *addr_space_to_str[] = { "i/o", "mem" };
-static int hotmod_handler(const char *val, struct kernel_param *kp);
+static int hotmod_handler(const char *val, const struct kernel_param *kp);
module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200);
MODULE_PARM_DESC(hotmod, "Add and remove interfaces. See"
@@ -1814,7 +1814,7 @@ static struct smi_info *smi_info_alloc(void)
return info;
}
-static int hotmod_handler(const char *val, struct kernel_param *kp)
+static int hotmod_handler(const char *val, const struct kernel_param *kp)
{
char *str = kstrdup(val, GFP_KERNEL);
int rv;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 15fcf2cac971..53226f33ea98 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1187,6 +1187,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+ policy->iowait_boost_enable = true;
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 1eaef20e5ed5..dca59eadc6c2 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -187,7 +187,7 @@ static uint32_t least_cluster_latency(struct lpm_cluster *cluster,
uint32_t latency = 0;
int i;
- if (!cluster->list.next) {
+ if (list_empty(&cluster->list)) {
for (i = 0; i < cluster->nlevels; i++) {
level = &cluster->levels[i];
pwr_params = &level->pwr;
@@ -691,7 +691,7 @@ static void update_history(struct cpuidle_device *dev, int idx);
static int cpu_power_select(struct cpuidle_device *dev,
struct lpm_cpu *cpu)
{
- int best_level = -1;
+ int best_level = 0;
uint32_t latency_us = pm_qos_request_for_cpu(PM_QOS_CPU_DMA_LATENCY,
dev->cpu);
s64 sleep_us = ktime_to_us(tick_nohz_get_sleep_length());
@@ -705,8 +705,6 @@ static int cpu_power_select(struct cpuidle_device *dev,
uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
uint32_t *max_residency = get_per_cpu_max_residency(dev->cpu);
- if (!cpu)
- return -EINVAL;
if ((sleep_disabled && !cpu_isolated(dev->cpu)) || sleep_us < 0)
return 0;
@@ -1536,17 +1534,11 @@ static int lpm_cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct lpm_cluster *cluster = per_cpu(cpu_cluster, dev->cpu);
- int idx;
if (!cluster)
return 0;
- idx = cpu_power_select(dev, cluster->cpu);
-
- if (idx < 0)
- return -EPERM;
-
- return idx;
+ return cpu_power_select(dev, cluster->cpu);
}
static void update_history(struct cpuidle_device *dev, int idx)
@@ -1591,9 +1583,6 @@ static int lpm_cpuidle_enter(struct cpuidle_device *dev,
int64_t start_time = ktime_to_ns(ktime_get()), end_time;
struct power_params *pwr_params;
- if (idx < 0)
- return -EINVAL;
-
pwr_params = &cluster->cpu->levels[idx].pwr;
sched_set_cpu_cstate(smp_processor_id(), idx + 1,
pwr_params->energy_overhead, pwr_params->latency_us);
diff --git a/drivers/crypto/msm/qce50.c b/drivers/crypto/msm/qce50.c
index 7740a8c59126..3f8c512e0cd9 100644
--- a/drivers/crypto/msm/qce50.c
+++ b/drivers/crypto/msm/qce50.c
@@ -1,6 +1,6 @@
/* Qualcomm Crypto Engine driver.
*
- * Copyright (c) 2012-2018, 2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2018, 2020-2021 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -866,6 +866,11 @@ static int _ce_setup_cipher(struct qce_device *pce_dev, struct qce_req *creq,
break;
case CIPHER_ALG_3DES:
if (creq->mode != QCE_MODE_ECB) {
+ if (ivsize > MAX_IV_LENGTH) {
+ pr_err("%s: error: Invalid length parameter\n",
+ __func__);
+ return -EINVAL;
+ }
_byte_stream_to_net_words(enciv32, creq->iv, ivsize);
pce = cmdlistinfo->encr_cntr_iv;
pce->data = enciv32[0];
@@ -914,6 +919,11 @@ static int _ce_setup_cipher(struct qce_device *pce_dev, struct qce_req *creq,
}
}
if (creq->mode != QCE_MODE_ECB) {
+ if (ivsize > MAX_IV_LENGTH) {
+ pr_err("%s: error: Invalid length parameter\n",
+ __func__);
+ return -EINVAL;
+ }
if (creq->mode == QCE_MODE_XTS)
_byte_stream_swap_to_net_words(enciv32,
creq->iv, ivsize);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index d459cf4b8579..4aaf54132980 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -50,7 +50,7 @@ unsigned int edac_mc_get_poll_msec(void)
return edac_mc_poll_msec;
}
-static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+static int edac_set_poll_msec(const char *val, const struct kernel_param *kp)
{
unsigned int i;
int ret;
diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 9cb082a19d8a..d93afc585be4 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -19,7 +19,8 @@
#ifdef CONFIG_EDAC_DEBUG
-static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
+static int edac_set_debug_level(const char *buf,
+ const struct kernel_param *kp)
{
unsigned long val;
int ret;
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 832df3c58e2f..b858494a0a55 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -18,6 +18,11 @@
#include "efistub.h"
+bool efi__get___nokaslr(void)
+{
+ return nokaslr();
+}
+
static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
{
static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e33593ed8f84..1f84f9ac5430 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -38,7 +38,7 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
u64 phys_seed = 0;
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
- if (!nokaslr()) {
+ if (!efi__get___nokaslr()) {
status = efi_get_random_bytes(sys_table_arg,
sizeof(phys_seed),
(u8 *)&phys_seed);
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index a5eaa3ac0a5d..f1907c792a52 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -9,6 +9,8 @@ extern int __pure nokaslr(void);
void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
+bool efi__get___nokaslr(void);
+
efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
void **__fh);
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 7c9a801472e5..cd44789da808 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1,4 +1,5 @@
-/* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1016,6 +1017,8 @@ static int adreno_probe(struct platform_device *pdev)
adreno_debugfs_init(adreno_dev);
adreno_profile_init(adreno_dev);
+ adreno_dev->perfcounter = false;
+
adreno_sysfs_init(adreno_dev);
kgsl_pwrscale_init(&pdev->dev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR);
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index f433cdef1827..2af892671ee5 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,5 @@
-/* Copyright (c) 2008-2018 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2018,2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -439,6 +440,12 @@ struct adreno_device {
struct list_head active_list;
spinlock_t active_list_lock;
+
+ /*
+ * @perfcounter: Flag to clear perfcounters across contexts and
+ * controls perfcounter ioctl read
+ */
+ bool perfcounter;
};
/**
diff --git a/drivers/gpu/msm/adreno_compat.c b/drivers/gpu/msm/adreno_compat.c
index 5a8d587d4536..faaec90ba74a 100644
--- a/drivers/gpu/msm/adreno_compat.c
+++ b/drivers/gpu/msm/adreno_compat.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -229,6 +230,14 @@ static long adreno_ioctl_perfcounter_read_compat(
struct kgsl_perfcounter_read_compat *read32 = data;
struct kgsl_perfcounter_read read;
+ /*
+ * When performance counter zapping is enabled, the counters are cleared
+ * across context switches. Reading the counters when they are zapped is
+ * not permitted.
+ */
+ if (!adreno_dev->perfcounter)
+ return -EPERM;
+
read.reads = (struct kgsl_perfcounter_read_group __user *)
(uintptr_t)read32->reads;
read.count = read32->count;
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index 5209f1b2a2e6..3c804ca09371 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2013-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1149,7 +1150,8 @@ static inline bool _verify_ib(struct kgsl_device_private *dev_priv,
}
/* Make sure that the address is mapped */
- if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) {
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr,
+ ib->size)) {
pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n",
context->id, ib->gpuaddr);
return false;
diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c
index 0d5e3e094c36..579ba2b61863 100644
--- a/drivers/gpu/msm/adreno_ioctl.c
+++ b/drivers/gpu/msm/adreno_ioctl.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -76,6 +77,14 @@ static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
struct kgsl_perfcounter_read *read = data;
+ /*
+ * When performance counter zapping is enabled, the counters are cleared
+ * across context switches. Reading the counters when they are zapped is
+ * not permitted.
+ */
+ if (!adreno_dev->perfcounter)
+ return -EPERM;
+
return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
read->count);
}
diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c
index f893b29e816e..a6c179f7c932 100644
--- a/drivers/gpu/msm/adreno_sysfs.c
+++ b/drivers/gpu/msm/adreno_sysfs.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -233,6 +234,31 @@ static unsigned int _lm_show(struct adreno_device *adreno_dev)
return test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag);
}
+static unsigned int _perfcounter_show(struct adreno_device *adreno_dev)
+{
+ return adreno_dev->perfcounter;
+}
+
+static int _perfcounter_store(struct adreno_device *adreno_dev,
+ unsigned int val)
+{
+ struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+ if (adreno_dev->perfcounter == val)
+ return 0;
+
+ mutex_lock(&device->mutex);
+
+ /* Power down the GPU before changing the state */
+ kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+ adreno_dev->perfcounter = val;
+ kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+ mutex_unlock(&device->mutex);
+
+ return 0;
+}
+
static ssize_t _sysfs_store_u32(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -327,6 +353,7 @@ static ADRENO_SYSFS_BOOL(lm);
static ADRENO_SYSFS_BOOL(preemption);
static ADRENO_SYSFS_BOOL(hwcg);
static ADRENO_SYSFS_BOOL(throttling);
+static ADRENO_SYSFS_BOOL(perfcounter);
@@ -343,6 +370,7 @@ static const struct device_attribute *_attr_list[] = {
&adreno_attr_preemption.attr,
&adreno_attr_hwcg.attr,
&adreno_attr_throttling.attr,
+ &adreno_attr_perfcounter.attr,
NULL,
};
@@ -499,8 +527,11 @@ int adreno_sysfs_init(struct adreno_device *adreno_dev)
ret = kgsl_create_device_sysfs_files(device->dev, _attr_list);
/* Add the PPD directory and files */
- if (ret == 0)
+ if (ret == 0) {
+ /* Notify userspace */
+ kobject_uevent(&device->dev->kobj, KOBJ_ADD);
ppd_sysfs_init(adreno_dev);
+ }
return 0;
}
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index bda0afed50b6..4b9f779e525e 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2008-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -1273,7 +1274,7 @@ kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr)
if (!private)
return NULL;
- if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr, 0))
return NULL;
spin_lock(&private->mem_lock);
@@ -2183,6 +2184,15 @@ static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, unsigned long useraddr)
ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages,
0, memdesc->size, GFP_KERNEL);
+
+ if (ret)
+ goto out;
+
+ ret = kgsl_cache_range_op(memdesc, 0, memdesc->size,
+ KGSL_CACHE_OP_FLUSH);
+
+ if (ret)
+ sg_free_table(memdesc->sgt);
out:
if (ret) {
for (i = 0; i < npages; i++)
@@ -2295,10 +2305,27 @@ static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
return -EFAULT;
}
- /* Look for the fd that matches this the vma file */
+ /* Look for the fd that matches this vma file */
fd = iterate_fd(current->files, 0, match_file, vma->vm_file);
- if (fd != 0)
+ if (fd) {
dmabuf = dma_buf_get(fd - 1);
+ if (IS_ERR(dmabuf)) {
+ up_read(&current->mm->mmap_sem);
+ return PTR_ERR(dmabuf);
+ }
+ /*
+ * It is possible that the fd obtained from iterate_fd
+ * was closed before passing the fd to dma_buf_get().
+ * Hence dmabuf returned by dma_buf_get() could be
+ * different from vma->vm_file->private_data. Return
+ * failure if this happens.
+ */
+ if (dmabuf != vma->vm_file->private_data) {
+ dma_buf_put(dmabuf);
+ up_read(&current->mm->mmap_sem);
+ return -EBADF;
+ }
+ }
}
if (IS_ERR_OR_NULL(dmabuf)) {
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 98537730cbc9..31e8a7ea5f65 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2011-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -2525,20 +2526,21 @@ static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable,
}
static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
struct kgsl_iommu_pt *pt = pagetable->priv;
if (gpuaddr == 0)
return false;
- if (gpuaddr >= pt->va_start && gpuaddr < pt->va_end)
+ if (gpuaddr >= pt->va_start && (gpuaddr + size) < pt->va_end)
return true;
- if (gpuaddr >= pt->compat_va_start && gpuaddr < pt->compat_va_end)
+ if (gpuaddr >= pt->compat_va_start &&
+ (gpuaddr + size) < pt->compat_va_end)
return true;
- if (gpuaddr >= pt->svm_start && gpuaddr < pt->svm_end)
+ if (gpuaddr >= pt->svm_start && (gpuaddr + size) < pt->svm_end)
return true;
return false;
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index d0a45e713d0d..228f3396ae90 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2017,2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -606,10 +607,11 @@ enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device)
EXPORT_SYMBOL(kgsl_mmu_get_mmutype);
bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
if (PT_OP_VALID(pagetable, addr_in_range))
- return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr);
+ return pagetable->pt_ops->addr_in_range(pagetable,
+ gpuaddr, size);
return false;
}
@@ -645,7 +647,7 @@ EXPORT_SYMBOL(kgsl_mmu_get_qtimer_global_entry);
*/
static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
- uint64_t gpuaddr)
+ uint64_t gpuaddr, uint64_t size)
{
return (gpuaddr != 0) ? true : false;
}
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index 505fe591a53e..fcf3b188f9f7 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -1,4 +1,5 @@
/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -99,7 +100,8 @@ struct kgsl_mmu_pt_ops {
int (*set_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t);
int (*svm_range)(struct kgsl_pagetable *, uint64_t *, uint64_t *,
uint64_t);
- bool (*addr_in_range)(struct kgsl_pagetable *pagetable, uint64_t);
+ bool (*addr_in_range)(struct kgsl_pagetable *pagetable,
+ uint64_t, uint64_t);
int (*mmu_map_offset)(struct kgsl_pagetable *pt,
uint64_t virtaddr, uint64_t virtoffset,
struct kgsl_memdesc *memdesc, uint64_t physoffset,
@@ -187,7 +189,8 @@ unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu,
u64 ttbr0, uint64_t addr);
enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device);
-bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr);
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr,
+ uint64_t size);
int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable,
uint64_t gpuaddr, uint64_t size);
diff --git a/drivers/gpu/msm/kgsl_pool.c b/drivers/gpu/msm/kgsl_pool.c
index cd5f8e3efe98..77bb50ded36f 100644
--- a/drivers/gpu/msm/kgsl_pool.c
+++ b/drivers/gpu/msm/kgsl_pool.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -83,6 +84,15 @@ _kgsl_pool_zero_page(struct page *p, unsigned int pool_order)
static void
_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p)
{
+ /*
+ * Sanity check to make sure we don't re-pool a page that
+ * somebody else has a reference to.
+ */
+ if (WARN_ON_ONCE(unlikely(page_count(p) > 1))) {
+ __free_pages(p, pool->pool_order);
+ return;
+ }
+
_kgsl_pool_zero_page(p, pool->pool_order);
spin_lock(&pool->list_lock);
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index d15e824e39df..de969713ce59 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -34,7 +34,8 @@ module_param(emulate_scroll_wheel, bool, 0644);
MODULE_PARM_DESC(emulate_scroll_wheel, "Emulate a scroll wheel");
static unsigned int scroll_speed = 32;
-static int param_set_scroll_speed(const char *val, struct kernel_param *kp) {
+static int param_set_scroll_speed(const char *val,
+ const struct kernel_param *kp) {
unsigned long speed;
if (!val || kstrtoul(val, 0, &speed) || speed > 63)
return -EINVAL;
diff --git a/drivers/hwtracing/coresight/coresight-event.c b/drivers/hwtracing/coresight/coresight-event.c
index 0bced010d4c5..8e105af3e4e3 100644
--- a/drivers/hwtracing/coresight/coresight-event.c
+++ b/drivers/hwtracing/coresight/coresight-event.c
@@ -20,12 +20,13 @@
#include <trace/events/exception.h>
static int event_abort_enable;
-static int event_abort_set(const char *val, struct kernel_param *kp);
+static int event_abort_set(const char *val, const struct kernel_param *kp);
module_param_call(event_abort_enable, event_abort_set, param_get_int,
&event_abort_enable, 0644);
static int event_abort_early_panic = 1;
-static int event_abort_on_panic_set(const char *val, struct kernel_param *kp);
+static int event_abort_on_panic_set(const char *val,
+ const struct kernel_param *kp);
module_param_call(event_abort_early_panic, event_abort_on_panic_set,
param_get_int, &event_abort_early_panic, 0644);
@@ -96,7 +97,7 @@ static void event_abort_unregister(void)
unregister_trace_unhandled_abort(event_abort_unhandled_abort, NULL);
}
-static int event_abort_set(const char *val, struct kernel_param *kp)
+static int event_abort_set(const char *val, const struct kernel_param *kp)
{
int ret;
@@ -114,7 +115,8 @@ static int event_abort_set(const char *val, struct kernel_param *kp)
return ret;
}
-static int event_abort_on_panic_set(const char *val, struct kernel_param *kp)
+static int event_abort_on_panic_set(const char *val,
+ const struct kernel_param *kp)
{
int ret;
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index f086ef387475..80f5b8c3b004 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -244,7 +244,7 @@ struct chs_geom {
static unsigned int ide_disks;
static struct chs_geom ide_disks_chs[MAX_HWIFS * MAX_DRIVES];
-static int ide_set_disk_chs(const char *str, struct kernel_param *kp)
+static int ide_set_disk_chs(const char *str, const struct kernel_param *kp)
{
int a, b, c = 0, h = 0, s = 0, i, j = 1;
@@ -328,7 +328,7 @@ static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
static unsigned int ide_ignore_cable;
-static int ide_set_ignore_cable(const char *s, struct kernel_param *kp)
+static int ide_set_ignore_cable(const char *s, const struct kernel_param *kp)
{
int i, j = 1;
diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index 66792e707d74..46dd8288e294 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -4,7 +4,6 @@
menuconfig IIO
tristate "Industrial I/O support"
- select ANON_INODES
help
The industrial I/O subsystem provides a unified framework for
drivers for many different types of embedded sensors using a
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c151bb625179..0b2ba086d198 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -23,7 +23,6 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
- select ANON_INODES
---help---
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 77cc77ba998f..49691557f798 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -150,7 +150,7 @@ static struct kparam_string kp_txselect = {
.string = txselect_list,
.maxlen = MAX_ATTEN_LEN
};
-static int setup_txselect(const char *, struct kernel_param *);
+static int setup_txselect(const char *, const struct kernel_param *);
module_param_call(txselect, setup_txselect, param_get_string,
&kp_txselect, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(txselect,
@@ -6194,7 +6194,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
}
/* handle the txselect parameter changing */
-static int setup_txselect(const char *str, struct kernel_param *kp)
+static int setup_txselect(const char *str, const struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 4173fe977721..f8ae5d9a7729 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -80,7 +80,7 @@ module_param(srpt_srq_size, int, 0444);
MODULE_PARM_DESC(srpt_srq_size,
"Shared receive queue (SRQ) size.");
-static int srpt_get_u64_x(char *buffer, struct kernel_param *kp)
+static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg);
}
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index 292991c90c02..f36f4cb3e7df 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -156,7 +156,7 @@ _set_debug(struct fritzcard *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct fritzcard *card;
diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index d0b6377b9834..64060429aa50 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -244,7 +244,7 @@ _set_debug(struct inf_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct inf_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index a74741d28ca8..ec3749184451 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -111,7 +111,7 @@ _set_debug(struct tiger_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct tiger_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c
index 9815bb4eec9c..1f1446ed8d5f 100644
--- a/drivers/isdn/hardware/mISDN/speedfax.c
+++ b/drivers/isdn/hardware/mISDN/speedfax.c
@@ -94,7 +94,7 @@ _set_debug(struct sfax_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct sfax_hw *card;
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index 741675525b53..796454722610 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -101,7 +101,7 @@ _set_debug(struct w6692_hw *card)
}
static int
-set_debug(const char *val, struct kernel_param *kp)
+set_debug(const char *val, const struct kernel_param *kp)
{
int ret;
struct w6692_hw *card;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b05afab6eaf2..8aebcf2a30d2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5100,7 +5100,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
return NULL;
}
-static int add_named_array(const char *val, struct kernel_param *kp)
+static int add_named_array(const char *val, const struct kernel_param *kp)
{
/* val must be "md_*" where * is not all digits.
* We allocate an array with a large free minor number, and
@@ -9330,11 +9330,11 @@ static __exit void md_exit(void)
subsys_initcall(md_init);
module_exit(md_exit)
-static int get_ro(char *buffer, struct kernel_param *kp)
+static int get_ro(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%d", start_readonly);
}
-static int set_ro(const char *val, struct kernel_param *kp)
+static int set_ro(const char *val, const struct kernel_param *kp)
{
return kstrtouint(val, 10, (unsigned int *)&start_readonly);
}
diff --git a/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c b/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
index caf69af25601..167ea3bc7a7e 100644
--- a/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
+++ b/drivers/media/platform/msm/camera_v2/isp/msm_isp32.c
@@ -162,12 +162,12 @@ static int32_t msm_vfe32_init_qos_parms(struct vfe_device *vfe_dev,
__func__);
kfree(ds_settings);
kfree(ds_regs);
- } else {
+ } else {
for (i = 0; i < ds_entries; i++)
msm_camera_io_w(ds_settings[i],
vfebase + ds_regs[i]);
- kfree(ds_regs);
- kfree(ds_settings);
+ kfree(ds_regs);
+ kfree(ds_settings);
}
} else {
kfree(ds_regs);
diff --git a/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c b/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
index da42439c60d6..e03635b3a833 100644
--- a/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
+++ b/drivers/media/platform/msm/camera_v2/isp/msm_isp_axi_util.c
@@ -2271,8 +2271,8 @@ static int msm_isp_process_done_buf(struct vfe_device *vfe_dev,
MSM_ISP_BUFFER_STATE_PUT_BUF;
buf->buf_debug.put_state_last ^= 1;
rc = vfe_dev->buf_mgr->ops->buf_done(vfe_dev->buf_mgr,
- buf->bufq_handle, buf->buf_idx, time_stamp,
- frame_id, stream_info->runtime_output_format);
+ buf->bufq_handle, buf->buf_idx, time_stamp,
+ frame_id, stream_info->runtime_output_format);
if (rc == -EFAULT) {
msm_isp_halt_send_error(vfe_dev,
ISP_EVENT_BUF_FATAL_ERROR);
diff --git a/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c b/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
index 44a4ad7822cb..6e2eaddbac79 100644
--- a/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
+++ b/drivers/media/platform/msm/camera_v2/sensor/eeprom/msm_eeprom.c
@@ -963,7 +963,7 @@ static int msm_eeprom_get_dt_data(struct msm_eeprom_ctrl_t *e_ctrl)
of_node = e_ctrl->pdev->dev.of_node;
if (!of_node) {
- pr_err("%s: %d of_node is NULL\n", __func__ , __LINE__);
+ pr_err("%s: %d of_node is NULL\n", __func__, __LINE__);
return -ENOMEM;
}
rc = msm_camera_get_dt_vreg_data(of_node, &power_info->cam_vreg,
diff --git a/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c b/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
index e0b703321dae..419371a9f318 100644
--- a/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
+++ b/drivers/media/platform/msm/camera_v2/sensor/io/msm_camera_dt_util.c
@@ -144,6 +144,7 @@ int msm_camera_fill_vreg_params(struct camera_vreg_t *cam_vreg,
break;
}
}
+ break;
case CAM_V_CUSTOM2:
for (j = 0; j < num_vreg; j++) {
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index def22b7fef9c..2c4933363135 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -2202,7 +2202,7 @@ static int uvc_reset_resume(struct usb_interface *intf)
* Module parameters
*/
-static int uvc_clock_param_get(char *buffer, struct kernel_param *kp)
+static int uvc_clock_param_get(char *buffer, const struct kernel_param *kp)
{
if (uvc_clock_param == CLOCK_MONOTONIC)
return sprintf(buffer, "CLOCK_MONOTONIC");
@@ -2210,7 +2210,7 @@ static int uvc_clock_param_get(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "CLOCK_REALTIME");
}
-static int uvc_clock_param_set(const char *val, struct kernel_param *kp)
+static int uvc_clock_param_set(const char *val, const struct kernel_param *kp)
{
if (strncasecmp(val, "clock_", strlen("clock_")) == 0)
val += strlen("clock_");
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 207370d68c17..06cb6933b0ea 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -99,7 +99,7 @@ module_param(mpt_channel_mapping, int, 0);
MODULE_PARM_DESC(mpt_channel_mapping, " Mapping id's to channels (default=0)");
static int mpt_debug_level;
-static int mpt_set_debug_level(const char *val, struct kernel_param *kp);
+static int mpt_set_debug_level(const char *val, const struct kernel_param *kp);
module_param_call(mpt_debug_level, mpt_set_debug_level, param_get_int,
&mpt_debug_level, 0600);
MODULE_PARM_DESC(mpt_debug_level,
@@ -242,7 +242,7 @@ pci_enable_io_access(struct pci_dev *pdev)
pci_write_config_word(pdev, PCI_COMMAND, command_reg);
}
-static int mpt_set_debug_level(const char *val, struct kernel_param *kp)
+static int mpt_set_debug_level(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
MPT_ADAPTER *ioc;
diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h
index 9b083448814d..5bd127727d8e 100644
--- a/drivers/misc/ibmasm/ibmasm.h
+++ b/drivers/misc/ibmasm/ibmasm.h
@@ -211,7 +211,7 @@ void ibmasmfs_unregister(void);
void ibmasmfs_add_sp(struct service_processor *sp);
/* uart */
-#ifdef CONFIG_SERIAL_8250
+#if IS_ENABLED(CONFIG_SERIAL_8250)
void ibmasm_register_uart(struct service_processor *sp);
void ibmasm_unregister_uart(struct service_processor *sp);
#else
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index ab2184003c29..71becb5c4dec 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -1127,7 +1127,8 @@ static void kgdbts_put_char(u8 chr)
ts.run_test(0, chr);
}
-static int param_set_kgdbts_var(const char *kmessage, struct kernel_param *kp)
+static int param_set_kgdbts_var(const char *kmessage,
+ const struct kernel_param *kp)
{
size_t len = strlen(kmessage);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index e2c0057737e6..b468819bc436 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -431,7 +431,7 @@ static int block2mtd_setup2(const char *val)
}
-static int block2mtd_setup(const char *val, struct kernel_param *kp)
+static int block2mtd_setup(const char *val, const struct kernel_param *kp)
{
#ifdef MODULE
return block2mtd_setup2(val);
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c
index 9734e6903fe6..d312c1b5a78e 100644
--- a/drivers/mtd/devices/phram.c
+++ b/drivers/mtd/devices/phram.c
@@ -269,7 +269,7 @@ error:
return ret;
}
-static int phram_param_call(const char *val, struct kernel_param *kp)
+static int phram_param_call(const char *val, const struct kernel_param *kp)
{
#ifdef MODULE
return phram_setup(val);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 2af841cb7097..f45394281354 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1456,7 +1456,7 @@ static int __init bytes_str_to_int(const char *str)
* This function returns zero in case of success and a negative error code in
* case of error.
*/
-static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+static int __init ubi_mtd_param_parse(const char *val, const struct kernel_param *kp)
{
int i, len;
struct mtd_dev_param *p;
diff --git a/drivers/net/wireless/wcnss/wcnss_wlan.c b/drivers/net/wireless/wcnss/wcnss_wlan.c
index fbf2b49c7832..e15d4b5a58eb 100644
--- a/drivers/net/wireless/wcnss/wcnss_wlan.c
+++ b/drivers/net/wireless/wcnss/wcnss_wlan.c
@@ -1655,7 +1655,7 @@ EXPORT_SYMBOL(wcnss_get_wlan_mac_address);
static int enable_wcnss_suspend_notify;
static int enable_wcnss_suspend_notify_set(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
int ret;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 04bb9b1b14da..12720e807117 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -1,7 +1,7 @@
/*
* Device tree based initialization code for reserved memory.
*
- * Copyright (c) 2013, 2015 The Linux Foundation. All Rights Reserved.
+ * Copyright (c) 2013, 2015, 2017 The Linux Foundation. All Rights Reserved.
* Copyright (c) 2013,2014 Samsung Electronics Co., Ltd.
* http://www.samsung.com
* Author: Marek Szyprowski <m.szyprowski@samsung.com>
@@ -22,7 +22,7 @@
#include <linux/of_reserved_mem.h>
#include <linux/sort.h>
-#define MAX_RESERVED_REGIONS 16
+#define MAX_RESERVED_REGIONS 32
static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
static int reserved_mem_count;
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index a098f8324afd..4f7f3ddcea06 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -772,7 +772,8 @@ void pci_disable_link_state(struct pci_dev *pdev, int state)
}
EXPORT_SYMBOL(pci_disable_link_state);
-static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
+static int pcie_aspm_set_policy(const char *val,
+ const struct kernel_param *kp)
{
int i;
struct pcie_link_state *link;
@@ -799,7 +800,7 @@ static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
return 0;
}
-static int pcie_aspm_get_policy(char *buffer, struct kernel_param *kp)
+static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
{
int i, cnt = 0;
for (i = 0; i < ARRAY_SIZE(policy_str); i++)
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d6d671a925e1..0111dad43c9a 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1047,7 +1047,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
const struct pmu_probe_info *probe_table)
{
const struct of_device_id *of_id;
- const int (*init_fn)(struct arm_pmu *);
+ int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
int ret = -ENODEV;
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 22496ad167a0..ee8c09717597 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -631,6 +631,7 @@ static void msm_gpio_irq_enable(struct irq_data *d)
static void msm_gpio_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ uint32_t irqtype = irqd_get_trigger_type(d);
struct msm_pinctrl *pctrl = to_msm_pinctrl(gc);
const struct msm_pingroup *g;
unsigned long flags;
@@ -640,6 +641,12 @@ static void msm_gpio_irq_unmask(struct irq_data *d)
spin_lock_irqsave(&pctrl->lock, flags);
+ if (irqtype & (IRQF_TRIGGER_HIGH | IRQF_TRIGGER_LOW)) {
+ val = readl_relaxed(pctrl->regs + g->intr_status_reg);
+ val &= ~BIT(g->intr_status_bit);
+ writel_relaxed(val, pctrl->regs + g->intr_status_reg);
+ }
+
val = readl(pctrl->regs + g->intr_status_reg);
val &= ~BIT(g->intr_status_bit);
writel(val, pctrl->regs + g->intr_status_reg);
@@ -905,7 +912,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
ret = gpiochip_irqchip_add(chip,
&msm_gpio_irq_chip,
0,
- handle_edge_irq,
+ handle_fasteoi_irq,
IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "Failed to add irqchip to gpiochip\n");
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 466a0d0162c3..c345136022bf 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9247,7 +9247,7 @@ static struct ibm_init_struct ibms_init[] __initdata = {
},
};
-static int __init set_ibm_param(const char *val, struct kernel_param *kp)
+static int __init set_ibm_param(const char *val, const struct kernel_param *kp)
{
unsigned int i;
struct ibm_struct *ibm;
diff --git a/drivers/power/reset/msm-poweroff.c b/drivers/power/reset/msm-poweroff.c
index c14bc93de717..f267ec9d80c0 100644
--- a/drivers/power/reset/msm-poweroff.c
+++ b/drivers/power/reset/msm-poweroff.c
@@ -94,7 +94,7 @@ static void *kaslr_imem_addr;
#endif
static bool scm_dload_supported;
-static int dload_set(const char *val, struct kernel_param *kp);
+static int dload_set(const char *val, const struct kernel_param *kp);
/* interface for exporting attributes */
struct reset_attribute {
struct attribute attr;
@@ -183,7 +183,7 @@ static void enable_emergency_dload_mode(void)
pr_err("Failed to set secure EDLOAD mode: %d\n", ret);
}
-static int dload_set(const char *val, struct kernel_param *kp)
+static int dload_set(const char *val, const struct kernel_param *kp)
{
int ret;
int old_val = download_mode;
diff --git a/drivers/power/supply/qcom/qpnp-smb2.c b/drivers/power/supply/qcom/qpnp-smb2.c
index 5fae7b99d88f..b0704af49353 100644
--- a/drivers/power/supply/qcom/qpnp-smb2.c
+++ b/drivers/power/supply/qcom/qpnp-smb2.c
@@ -917,6 +917,7 @@ static int smb2_init_dc_psy(struct smb2 *chip)
*************************/
static enum power_supply_property smb2_batt_props[] = {
+ POWER_SUPPLY_PROP_CHARGING_ENABLED,
POWER_SUPPLY_PROP_INPUT_SUSPEND,
POWER_SUPPLY_PROP_STATUS,
POWER_SUPPLY_PROP_HEALTH,
@@ -967,6 +968,9 @@ static int smb2_batt_get_prop(struct power_supply *psy,
case POWER_SUPPLY_PROP_PRESENT:
rc = smblib_get_prop_batt_present(chg, val);
break;
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
+ val->intval = !get_effective_result(chg->chg_disable_votable);
+ break;
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
rc = smblib_get_prop_input_suspend(chg, val);
break;
@@ -1079,6 +1083,9 @@ static int smb2_batt_set_prop(struct power_supply *psy,
struct smb_charger *chg = power_supply_get_drvdata(psy);
switch (prop) {
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
+ vote(chg->chg_disable_votable, USER_VOTER, !!!val->intval, 0);
+ break;
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
rc = smblib_set_prop_input_suspend(chg, val);
break;
@@ -1163,6 +1170,7 @@ static int smb2_batt_prop_is_writeable(struct power_supply *psy,
enum power_supply_property psp)
{
switch (psp) {
+ case POWER_SUPPLY_PROP_CHARGING_ENABLED:
case POWER_SUPPLY_PROP_INPUT_SUSPEND:
case POWER_SUPPLY_PROP_SYSTEM_TEMP_LEVEL:
case POWER_SUPPLY_PROP_CAPACITY:
diff --git a/drivers/power/supply/qcom/smb-lib.c b/drivers/power/supply/qcom/smb-lib.c
index 81623c65ea8e..1782f23fafa7 100644
--- a/drivers/power/supply/qcom/smb-lib.c
+++ b/drivers/power/supply/qcom/smb-lib.c
@@ -4457,6 +4457,7 @@ static void rdstd_cc2_detach_work(struct work_struct *work)
{
int rc;
u8 stat4, stat5;
+ bool lock = false;
struct smb_charger *chg = container_of(work, struct smb_charger,
rdstd_cc2_detach_work);
@@ -4519,9 +4520,28 @@ static void rdstd_cc2_detach_work(struct work_struct *work)
rc = smblib_masked_write(chg, TYPE_C_INTRPT_ENB_SOFTWARE_CTRL_REG,
EXIT_SNK_BASED_ON_CC_BIT, 0);
smblib_reg_block_restore(chg, cc2_detach_settings);
- mutex_lock(&chg->lock);
+
+ /*
+ * Mutex acquisition deadlock can happen while cancelling this work
+ * during pd_hard_reset from the function smblib_cc2_sink_removal_exit
+ * which is called in the same lock context that we try to acquire in
+ * this work routine.
+ * Check if this work is running during pd_hard_reset and use trylock
+ * instead of mutex_lock to prevent any deadlock if mutext is already
+ * held.
+ */
+ if (chg->pd_hard_reset) {
+ if (mutex_trylock(&chg->lock))
+ lock = true;
+ } else {
+ mutex_lock(&chg->lock);
+ lock = true;
+ }
+
smblib_usb_typec_change(chg);
- mutex_unlock(&chg->lock);
+
+ if (lock)
+ mutex_unlock(&chg->lock);
return;
rerun:
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index d7597c08fa11..a70fa0eb3871 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -32,13 +32,13 @@ MODULE_AUTHOR("Open-FCoE.org");
MODULE_DESCRIPTION("FIP discovery protocol and FCoE transport for FCoE HBAs");
MODULE_LICENSE("GPL v2");
-static int fcoe_transport_create(const char *, struct kernel_param *);
-static int fcoe_transport_destroy(const char *, struct kernel_param *);
+static int fcoe_transport_create(const char *, const struct kernel_param *);
+static int fcoe_transport_destroy(const char *, const struct kernel_param *);
static int fcoe_transport_show(char *buffer, const struct kernel_param *kp);
static struct fcoe_transport *fcoe_transport_lookup(struct net_device *device);
static struct fcoe_transport *fcoe_netdev_map_lookup(struct net_device *device);
-static int fcoe_transport_enable(const char *, struct kernel_param *);
-static int fcoe_transport_disable(const char *, struct kernel_param *);
+static int fcoe_transport_enable(const char *, const struct kernel_param *);
+static int fcoe_transport_disable(const char *, const struct kernel_param *);
static int libfcoe_device_notification(struct notifier_block *notifier,
ulong event, void *ptr);
@@ -842,7 +842,8 @@ EXPORT_SYMBOL(fcoe_ctlr_destroy_store);
*
* Returns: 0 for success
*/
-static int fcoe_transport_create(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_create(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -907,7 +908,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_destroy(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_destroy(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -951,7 +953,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_disable(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_disable(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
@@ -985,7 +988,8 @@ out_nodev:
*
* Returns: 0 for success
*/
-static int fcoe_transport_enable(const char *buffer, struct kernel_param *kp)
+static int fcoe_transport_enable(const char *buffer,
+ const struct kernel_param *kp)
{
int rc = -ENODEV;
struct net_device *netdev = NULL;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 8d52afd1f71d..c8430f428e14 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -100,7 +100,7 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc, int sleep_flag);
*
*/
static int
-_scsih_set_fwfault_debug(const char *val, struct kernel_param *kp)
+_scsih_set_fwfault_debug(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 0e39bb1489ac..f02fc3e504c0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -283,7 +283,7 @@ struct _scsi_io_transfer {
* Note: The logging levels are defined in mpt3sas_debug.h.
*/
static int
-_scsih_set_debug_level(const char *val, struct kernel_param *kp)
+_scsih_set_debug_level(const char *val, const struct kernel_param *kp)
{
int ret = param_set_int(val, kp);
struct MPT3SAS_ADAPTER *ioc;
diff --git a/drivers/soc/qcom/hab/hab.h b/drivers/soc/qcom/hab/hab.h
index cb7ed52050b2..bdc8f54b08b4 100644
--- a/drivers/soc/qcom/hab/hab.h
+++ b/drivers/soc/qcom/hab/hab.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2019, 2021, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -98,6 +98,12 @@ enum hab_payload_type {
/* make sure concascaded name is less than this value */
#define MAX_VMID_NAME_SIZE 30
+/*
+ * The maximum value of payload_count in struct export_desc
+ * Max u32_t size_bytes from hab_ioctl.h(0xFFFFFFFF) / page size(0x1000)
+ */
+#define MAX_EXP_PAYLOAD_COUNT 0xFFFFF
+
#define HABCFG_FILE_SIZE_MAX 256
#define HABCFG_MMID_AREA_MAX (MM_ID_MAX/100)
diff --git a/drivers/soc/qcom/hab/hab_msg.c b/drivers/soc/qcom/hab/hab_msg.c
index 9f92074665df..4494cfbf0e0b 100644
--- a/drivers/soc/qcom/hab/hab_msg.c
+++ b/drivers/soc/qcom/hab/hab_msg.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2019, 2021, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -11,6 +11,7 @@
*
*/
#include "hab.h"
+#include "hab_grantable.h"
static int hab_rx_queue_empty(struct virtual_channel *vchan)
{
@@ -207,6 +208,8 @@ int hab_msg_recv(struct physical_channel *pchan,
struct export_desc *exp_desc = NULL, exp_ack = {0};
struct timeval tv = {0};
unsigned long long rx_mpm_tv = 0;
+ size_t exp_desc_size_expected = 0;
+ struct compressed_pfns *pfn_table = NULL;
/* get the local virtual channel if it isn't an open message */
if (payload_type != HAB_PAYLOAD_TYPE_INIT &&
@@ -295,8 +298,11 @@ int hab_msg_recv(struct physical_channel *pchan,
break;
case HAB_PAYLOAD_TYPE_EXPORT:
- if (sizebytes > HAB_HEADER_SIZE_MASK) {
- pr_err("%s exp size too large %zd header %zd\n",
+ exp_desc_size_expected = sizeof(struct export_desc)
+ + sizeof(struct compressed_pfns);
+ if (sizebytes > (size_t)HAB_HEADER_SIZE_MASK) ||
+ sizebytes < exp_desc_size_expected) {
+ pr_err("%s exp size too large/small %zu header %zu\n",
pchan->name, sizebytes, sizeof(*exp_desc));
break;
}
@@ -328,6 +334,53 @@ int hab_msg_recv(struct physical_channel *pchan,
hab_export_enqueue(vchan, exp_desc); /* for local use */
hab_send_export_ack(vchan, pchan, &exp_ack); /* ack exporter */
+ /*
+ * We should do all the checks here.
+ * But in order to improve performance, we put the
+ * checks related to exp->payload_count and
+ * pfn_table->region[i].size into function pages_list_create.
+ * So any potential usage of such data from the remote side
+ * after the checks here and before the checks in
+ * pages_list_create needs to add some more checks if necessary.
+ */
+ pfn_table = (struct compressed_pfns *)exp_desc->payload;
+ if (pfn_table->nregions <= 0 ||
+ (pfn_table->nregions >
+ SIZE_MAX / sizeof(struct region)) ||
+ (SIZE_MAX - exp_desc_size_expected <
+ pfn_table->nregions * sizeof(struct region))) {
+ pr_err("%s nregions is too large or negative, nregions:%d!\n",
+ pchan->name, pfn_table->nregions);
+ kfree(exp_desc);
+ break;
+ }
+
+ if (pfn_table->nregions > exp_desc->payload_count) {
+ pr_err("%s nregions %d greater than payload_count %d\n",
+ pchan->name, pfn_table->nregions,
+ exp_desc->payload_count);
+ kfree(exp_desc);
+ break;
+ }
+
+ if (exp_desc->payload_count > MAX_EXP_PAYLOAD_COUNT) {
+ pr_err("payload_count out of range: %d size overflow\n",
+ exp_desc->payload_count);
+ kfree(exp_desc);
+ break;
+ }
+
+ exp_desc_size_expected +=
+ pfn_table->nregions * sizeof(struct region);
+ if (sizebytes != exp_desc_size_expected) {
+ pr_err("%s exp size not equal %zu expect %zu\n",
+ pchan->name, sizebytes, exp_desc_size_expected);
+ kfree(exp_desc);
+ break;
+ }
+
+ hab_export_enqueue(vchan, exp_desc);
+ hab_send_export_ack(vchan, pchan, exp_desc);
break;
case HAB_PAYLOAD_TYPE_EXPORT_ACK:
diff --git a/drivers/soc/qcom/hab/khab_test.c b/drivers/soc/qcom/hab/khab_test.c
index 6add7af0489f..6a89afb3d726 100644
--- a/drivers/soc/qcom/hab/khab_test.c
+++ b/drivers/soc/qcom/hab/khab_test.c
@@ -241,13 +241,13 @@ int hab_perf_test(long testId)
return ret;
}
-static int kick_hab_perf_test(const char *val, struct kernel_param *kp);
-static int get_hab_perf_result(char *buffer, struct kernel_param *kp);
+static int kick_hab_perf_test(const char *val, const struct kernel_param *kp);
+static int get_hab_perf_result(char *buffer, const struct kernel_param *kp);
module_param_call(perf_test, kick_hab_perf_test, get_hab_perf_result,
NULL, S_IRUSR | S_IWUSR);
-static int kick_hab_perf_test(const char *val, struct kernel_param *kp)
+static int kick_hab_perf_test(const char *val, const struct kernel_param *kp)
{
long testId;
int err = kstrtol(val, 10, &testId);
@@ -258,7 +258,7 @@ static int kick_hab_perf_test(const char *val, struct kernel_param *kp)
return hab_perf_test(testId);
}
-static int get_hab_perf_result(char *buffer, struct kernel_param *kp)
+static int get_hab_perf_result(char *buffer, const struct kernel_param *kp)
{
return strlcpy(buffer, g_perf_test_result,
strlen(g_perf_test_result)+1);
diff --git a/drivers/soc/qcom/smp2p_sleepstate.c b/drivers/soc/qcom/smp2p_sleepstate.c
index 2ef25e48ce50..1f0809b61220 100644
--- a/drivers/soc/qcom/smp2p_sleepstate.c
+++ b/drivers/soc/qcom/smp2p_sleepstate.c
@@ -20,7 +20,8 @@
#define SET_DELAY (2 * HZ)
#define PROC_AWAKE_ID 12 /* 12th bit */
-static int slst_gpio_base_id;
+int slst_gpio_base_id;
+
/**
* sleepstate_pm_notifier() - PM notifier callback function.
@@ -36,13 +37,11 @@ static int sleepstate_pm_notifier(struct notifier_block *nb,
{
switch (event) {
case PM_SUSPEND_PREPARE:
- gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 0);
msleep(25); /* To be tuned based on SMP2P latencies */
msm_ipc_router_set_ws_allowed(true);
break;
case PM_POST_SUSPEND:
- gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 1);
msleep(25); /* To be tuned based on SMP2P latencies */
msm_ipc_router_set_ws_allowed(false);
break;
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 9f46e0f56b1a..1cda5df5843b 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1665,10 +1665,10 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
data.allocation.flags, true);
if (IS_ERR(handle))
return PTR_ERR(handle);
- pass_to_user(handle);
data.allocation.handle = handle->id;
cleanup_handle = handle;
+ pass_to_user(handle);
break;
}
case ION_IOC_FREE:
@@ -1713,11 +1713,12 @@ static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
} else {
+ data.handle.handle = handle->id;
handle = pass_to_user(handle);
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- else
- data.handle.handle = handle->id;
+ data.handle.handle = 0;
+ }
}
mutex_unlock(&client->lock);
break;
diff --git a/drivers/staging/android/ion/msm/msm_ion.c b/drivers/staging/android/ion/msm/msm_ion.c
index 1a3aca407c9e..0a61301385f2 100644
--- a/drivers/staging/android/ion/msm/msm_ion.c
+++ b/drivers/staging/android/ion/msm/msm_ion.c
@@ -628,6 +628,9 @@ static int check_vaddr_bounds(unsigned long start, unsigned long end)
struct vm_area_struct *vma;
int ret = 1;
+ if (!start)
+ goto out;
+
if (end < start)
goto out;
@@ -781,20 +784,28 @@ long msm_ion_custom_ioctl(struct ion_client *client,
down_read(&mm->mmap_sem);
- start = (unsigned long)data.flush_data.vaddr +
- data.flush_data.offset;
- end = start + data.flush_data.length;
-
- if (start && check_vaddr_bounds(start, end)) {
- pr_err("%s: virtual address %pK is out of bounds\n",
+ if ((unsigned long)data.flush_data.vaddr >
+ (ULONG_MAX - data.flush_data.offset)) {
+ pr_err("%s: Integer overflow detected for %pK\n",
__func__, data.flush_data.vaddr);
ret = -EINVAL;
} else {
- ret = ion_do_cache_op(
- client, handle, data.flush_data.vaddr,
- data.flush_data.offset,
- data.flush_data.length, cmd);
+ start = (unsigned long)data.flush_data.vaddr +
+ data.flush_data.offset;
+ end = start + data.flush_data.length;
+
+ if (check_vaddr_bounds(start, end)) {
+ pr_err("%s: virtual address %pK is out of bounds\n",
+ __func__, data.flush_data.vaddr);
+ ret = -EINVAL;
+ } else {
+ ret = ion_do_cache_op(
+ client, handle, data.flush_data.vaddr,
+ data.flush_data.offset,
+ data.flush_data.length, cmd);
+ }
}
+
up_read(&mm->mmap_sem);
ion_free_nolock(client, handle);
diff --git a/drivers/staging/rdma/ipath/ipath_init_chip.c b/drivers/staging/rdma/ipath/ipath_init_chip.c
index a5eea199f733..f8338f0e9d3f 100644
--- a/drivers/staging/rdma/ipath/ipath_init_chip.c
+++ b/drivers/staging/rdma/ipath/ipath_init_chip.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
*/
static ushort ipath_kpiobufs;
-static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
+static int ipath_set_kpiobufs(const char *str, const struct kernel_param *kp);
module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
&ipath_kpiobufs, S_IWUSR | S_IRUGO);
@@ -1014,7 +1014,7 @@ done:
return ret;
}
-static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
+static int ipath_set_kpiobufs(const char *str, const struct kernel_param *kp)
{
struct ipath_devdata *dd;
unsigned long flags;
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 0314e78e31ff..ec24e82da169 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -232,7 +232,8 @@ static void kgdboc_put_char(u8 chr)
kgdb_tty_line, chr);
}
-static int param_set_kgdboc_var(const char *kmessage, struct kernel_param *kp)
+static int param_set_kgdboc_var(const char *kmessage,
+ const struct kernel_param *kp)
{
size_t len = strlen(kmessage);
diff --git a/drivers/usb/chipidea/otg_fsm.h b/drivers/usb/chipidea/otg_fsm.h
index 2689375ae5da..262d6ef8df7c 100644
--- a/drivers/usb/chipidea/otg_fsm.h
+++ b/drivers/usb/chipidea/otg_fsm.h
@@ -62,7 +62,7 @@
/* SSEND time before SRP */
#define TB_SSEND_SRP (1500) /* minimum 1.5 sec, section:5.1.2 */
-#ifdef CONFIG_USB_OTG_FSM
+#if IS_ENABLED(CONFIG_USB_OTG_FSM)
int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci);
int ci_otg_fsm_work(struct ci_hdrc *ci);
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 0a6810ea4107..61c77107e97d 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1942,6 +1942,9 @@ unknown:
if (w_index != 0x5 || (w_value >> 8))
break;
interface = w_value & 0xFF;
+ if (interface >= MAX_CONFIG_INTERFACES ||
+ !os_desc_cfg->interface[interface])
+ break;
buf[6] = w_index;
if (w_length == 0x0A) {
count = count_ext_prop(os_desc_cfg,
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 9ba61939eed6..2c352d21c0cc 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -16,7 +16,7 @@
#include <linux/usb/ch9.h>
#ifdef CONFIG_USB_CONFIGFS_F_ACC
-extern int acc_ctrlrequest(struct usb_composite_dev *cdev,
+extern int acc_ctrlrequest_composite(struct usb_composite_dev *cdev,
const struct usb_ctrlrequest *ctrl);
void acc_disconnect(void);
#endif
@@ -1620,7 +1620,7 @@ static int android_setup(struct usb_gadget *gadget,
#ifdef CONFIG_USB_CONFIGFS_F_ACC
if (value < 0)
- value = acc_ctrlrequest(cdev, c);
+ value = acc_ctrlrequest_composite(cdev, c);
#endif
if (value < 0)
diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c
index b5c1ad06f8be..0bfd486a4414 100644
--- a/drivers/usb/gadget/function/f_accessory.c
+++ b/drivers/usb/gadget/function/f_accessory.c
@@ -1066,6 +1066,26 @@ err:
}
EXPORT_SYMBOL_GPL(acc_ctrlrequest);
+int acc_ctrlrequest_composite(struct usb_composite_dev *cdev,
+ const struct usb_ctrlrequest *ctrl)
+{
+ u16 w_length = le16_to_cpu(ctrl->wLength);
+
+ if (w_length > USB_COMP_EP0_BUFSIZ) {
+ if (ctrl->bRequestType & USB_DIR_IN) {
+ /* Cast away the const, we are going to overwrite on purpose. */
+ __le16 *temp = (__le16 *)&ctrl->wLength;
+
+ *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ);
+ w_length = USB_COMP_EP0_BUFSIZ;
+ } else {
+ return -EINVAL;
+ }
+ }
+ return acc_ctrlrequest(cdev, ctrl);
+}
+EXPORT_SYMBOL_GPL(acc_ctrlrequest_composite);
+
static int
__acc_function_bind(struct usb_configuration *c,
struct usb_function *f, bool configfs)
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 1d13d79d5070..3046066680d9 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -650,14 +650,18 @@ static int rndis_set_response(struct rndis_params *params,
rndis_set_cmplt_type *resp;
rndis_resp_t *r;
+ BufLength = le32_to_cpu(buf->InformationBufferLength);
+ BufOffset = le32_to_cpu(buf->InformationBufferOffset);
+ if ((BufLength > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset > RNDIS_MAX_TOTAL_SIZE) ||
+ (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE))
+ return -EINVAL;
+
r = rndis_add_response(params, sizeof(rndis_set_cmplt_type));
if (!r)
return -ENOMEM;
resp = (rndis_set_cmplt_type *)r->buf;
- BufLength = le32_to_cpu(buf->InformationBufferLength);
- BufOffset = le32_to_cpu(buf->InformationBufferOffset);
-
#ifdef VERBOSE_DEBUG
pr_debug("%s: Length: %d\n", __func__, BufLength);
pr_debug("%s: Offset: %d\n", __func__, BufOffset);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index f2b4fdd1f49d..a069150d97f3 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1851,8 +1851,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
spin_lock_irq (&dev->lock);
value = -EINVAL;
if (dev->buf) {
+ spin_unlock_irq(&dev->lock);
kfree(kbuf);
- goto fail;
+ return value;
}
dev->buf = kbuf;
@@ -1900,8 +1901,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
value = usb_gadget_probe_driver(&gadgetfs_driver);
if (value != 0) {
- kfree (dev->buf);
- dev->buf = NULL;
+ spin_lock_irq(&dev->lock);
+ goto fail;
} else {
/* at this point "good" hardware has for the first time
* let the USB the host see us. alternatively, if users
@@ -1917,6 +1918,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
return value;
fail:
+ dev->config = NULL;
+ dev->hs_config = NULL;
+ dev->dev = NULL;
spin_unlock_irq (&dev->lock);
pr_debug ("%s: %s fail %Zd, %p\n", shortname, __func__, value, dev);
kfree (dev->buf);
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 2938153fe7b1..9b1963a91ad5 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -269,7 +269,7 @@ static struct console usbcons = {
void usb_serial_console_disconnect(struct usb_serial *serial)
{
- if (serial && serial->port && serial->port[0]
+ if (serial && serial->port[0]
&& serial->port[0] == usbcons_info.port) {
usb_serial_console_exit();
usb_serial_put(serial);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 850d86ca685b..f08809323762 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -24,7 +24,6 @@ menuconfig VFIO
select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
- select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/vfio.txt for more details.
diff --git a/drivers/video/console/dummycon.c b/drivers/video/console/dummycon.c
index b30e7d87804b..04e75d5efb19 100644
--- a/drivers/video/console/dummycon.c
+++ b/drivers/video/console/dummycon.c
@@ -41,12 +41,69 @@ static void dummycon_init(struct vc_data *vc, int init)
vc_resize(vc, DUMMY_COLUMNS, DUMMY_ROWS);
}
-static int dummycon_dummy(void)
+static void dummycon_deinit(struct vc_data *vc)
+{
+}
+
+static void dummycon_clear(struct vc_data *vc, int a, int b, int c, int d)
+{
+}
+
+static void dummycon_putc(struct vc_data *vc, int a, int b, int c)
+{
+}
+
+static void dummycon_putcs(struct vc_data *vc, const unsigned short *s, int a, int b, int c)
+{
+}
+
+static void dummycon_cursor(struct vc_data *vc, int a)
+{
+}
+
+static int dummycon_scroll(struct vc_data *vc, int a, int b, int c, int d)
{
return 0;
}
-#define DUMMY (void *)dummycon_dummy
+static int dummycon_switch(struct vc_data *vc)
+{
+ return 0;
+}
+
+static int dummycon_blank(struct vc_data *vc, int a, int b)
+{
+ return 0;
+}
+
+static int dummycon_font_set(struct vc_data *vc, struct console_font *f, unsigned u)
+{
+ return 0;
+}
+
+static int dummycon_font_default(struct vc_data *vc, struct console_font *f, char *c)
+{
+ return 0;
+}
+
+static int dummycon_font_copy(struct vc_data *vc, int a)
+{
+ return 0;
+}
+
+static void con_bmove(struct vc_data *vc, int a, int b, int c, int d, int e, intf)
+{
+}
+
+static int con_set_palette(struct vc_data *vc, unsigned char *p)
+{
+ return 0;
+}
+
+static int con_scrolldelta(struct vc_data *vc, int x)
+{
+ return 0;
+}
/*
* The console `switch' structure for the dummy console
@@ -58,19 +115,19 @@ const struct consw dummy_con = {
.owner = THIS_MODULE,
.con_startup = dummycon_startup,
.con_init = dummycon_init,
- .con_deinit = DUMMY,
- .con_clear = DUMMY,
- .con_putc = DUMMY,
- .con_putcs = DUMMY,
- .con_cursor = DUMMY,
- .con_scroll = DUMMY,
.con_bmove = DUMMY,
- .con_switch = DUMMY,
- .con_blank = DUMMY,
- .con_font_set = DUMMY,
- .con_font_default = DUMMY,
- .con_font_copy = DUMMY,
.con_set_palette = DUMMY,
.con_scrolldelta = DUMMY,
+ .con_deinit = dummycon_deinit,
+ .con_clear = dummycon_clear,
+ .con_putc = dummycon_putc,
+ .con_putcs = dummycon_putcs,
+ .con_cursor = dummycon_cursor,
+ .con_scroll = dummycon_scroll,
+ .con_switch = dummycon_switch,
+ .con_blank = dummycon_blank,
+ .con_font_set = dummycon_font_set,
+ .con_font_default = dummycon_font_default,
+ .con_font_copy = dummycon_font_copy,
};
EXPORT_SYMBOL_GPL(dummy_con);
diff --git a/drivers/video/fbdev/msm/mdss_dsi_status.c b/drivers/video/fbdev/msm/mdss_dsi_status.c
index 0490bcdfbfad..ef07291d4d9a 100644
--- a/drivers/video/fbdev/msm/mdss_dsi_status.c
+++ b/drivers/video/fbdev/msm/mdss_dsi_status.c
@@ -217,7 +217,8 @@ static int fb_event_callback(struct notifier_block *self,
return 0;
}
-static int param_dsi_status_disable(const char *val, struct kernel_param *kp)
+static int param_dsi_status_disable(const char *val,
+ const struct kernel_param *kp)
{
int ret = 0;
int int_val;
@@ -232,7 +233,7 @@ static int param_dsi_status_disable(const char *val, struct kernel_param *kp)
return ret;
}
-static int param_set_interval(const char *val, struct kernel_param *kp)
+static int param_set_interval(const char *val, const struct kernel_param *kp)
{
int ret = 0;
int int_val;
diff --git a/drivers/video/fbdev/msm/mdss_mdp_intf_video.c b/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
index 3761fa4af0eb..caa910db508c 100644
--- a/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
+++ b/drivers/video/fbdev/msm/mdss_mdp_intf_video.c
@@ -1236,7 +1236,7 @@ static int mdss_mdp_video_wait4comp(struct mdss_mdp_ctl *ctl, void *arg)
if (rc == 0) {
pr_warn("vsync wait timeout %d, fallback to poll mode\n",
ctl->num);
- ctx->polling_en++;
+ ctx->polling_en = true;
rc = mdss_mdp_video_pollwait(ctl);
} else {
rc = 0;
diff --git a/drivers/video/fbdev/msm/msm_mdss_io_8974.c b/drivers/video/fbdev/msm/msm_mdss_io_8974.c
index 922c4440ba82..000beebe0375 100644
--- a/drivers/video/fbdev/msm/msm_mdss_io_8974.c
+++ b/drivers/video/fbdev/msm/msm_mdss_io_8974.c
@@ -1321,16 +1321,16 @@ static void mdss_dsi_phy_regulator_ctrl(struct mdss_dsi_ctrl_pdata *ctrl,
mdss_dsi_20nm_phy_regulator_enable(ctrl);
break;
default:
- /*
- * For dual dsi case, do not reconfigure dsi phy
- * regulator if the other dsi controller is still
- * active.
- */
- if (!mdss_dsi_is_hw_config_dual(sdata) ||
- (other_ctrl && (!other_ctrl->is_phyreg_enabled
- || other_ctrl->mmss_clamp)))
- mdss_dsi_28nm_phy_regulator_enable(ctrl);
- break;
+ /*
+ * For dual dsi case, do not reconfigure dsi phy
+ * regulator if the other dsi controller is still
+ * active.
+ */
+ if (!mdss_dsi_is_hw_config_dual(sdata) ||
+ (other_ctrl && (!other_ctrl->is_phyreg_enabled
+ || other_ctrl->mmss_clamp)))
+ mdss_dsi_28nm_phy_regulator_enable(ctrl);
+ break;
}
}
ctrl->is_phyreg_enabled = 1;
diff --git a/fs/Makefile b/fs/Makefile
index debf0742b3c9..969caba6b282 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
-obj-$(CONFIG_ANON_INODES) += anon_inodes.o
+obj-y += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
obj-$(CONFIG_TIMERFD) += timerfd.o
obj-$(CONFIG_EVENTFD) += eventfd.o
diff --git a/fs/afs/file.c b/fs/afs/file.c
index cf8a07e282a6..5290f6e83605 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -123,11 +123,10 @@ static void afs_file_readpage_read_complete(struct page *page,
/*
* read page from file, directory or symlink, given a key to use
*/
-int afs_page_filler(void *data, struct page *page)
+static int __afs_page_filler(struct key *key, struct page *page)
{
struct inode *inode = page->mapping->host;
struct afs_vnode *vnode = AFS_FS_I(inode);
- struct key *key = data;
size_t len;
off_t offset;
int ret;
@@ -209,6 +208,13 @@ error:
return ret;
}
+int afs_page_filler(struct file *data, struct page *page)
+{
+ struct key *key = (struct key *)data;
+
+ return __afs_page_filler(key, page);
+}
+
/*
* read page from file, directory or symlink, given a file to nominate the key
* to be used
@@ -221,14 +227,14 @@ static int afs_readpage(struct file *file, struct page *page)
if (file) {
key = file->private_data;
ASSERT(key != NULL);
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
} else {
struct inode *inode = page->mapping->host;
key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
} else {
- ret = afs_page_filler(key, page);
+ ret = __afs_page_filler(key, page);
key_put(key);
}
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1330b2a695ff..64452ba25988 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -499,7 +499,7 @@ extern const struct file_operations afs_file_operations;
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_page_filler(void *, struct page *);
+extern int afs_page_filler(struct file *, struct page *);
/*
* flock.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b212e82a..05ba277e6269 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -202,7 +202,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
/* try and do the mount */
_debug("--- attempting mount %s -o %s ---", devname, options);
- mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
+ mnt = vfs_submount(mntpt, &afs_fs_type, devname, options);
_debug("--- mount result %p ---", mnt);
free_page((unsigned long) devname);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 98198c57370b..acd43792ef82 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -439,8 +439,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
memcpy(&wq->name, &qstr, sizeof(struct qstr));
wq->dev = autofs4_get_dev(sbi);
wq->ino = autofs4_get_ino(sbi);
- wq->uid = current_uid();
- wq->gid = current_gid();
+ wq->uid = current_cred()->uid;
+ wq->gid = current_cred()->gid;
wq->pid = pid;
wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 1ea643faf04b..d8afb402f966 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -241,7 +241,8 @@ compose_mount_options_err:
* @fullpath: full path in UNC format
* @ref: server's referral
*/
-static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
+static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt,
+ struct cifs_sb_info *cifs_sb,
const char *fullpath, const struct dfs_info3_param *ref)
{
struct vfsmount *mnt;
@@ -255,7 +256,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb,
if (IS_ERR(mountdata))
return (struct vfsmount *)mountdata;
- mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata);
+ mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata);
kfree(mountdata);
kfree(devname);
return mnt;
@@ -330,7 +331,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
mnt = ERR_PTR(-EINVAL);
break;
}
- mnt = cifs_dfs_do_refmount(cifs_sb,
+ mnt = cifs_dfs_do_refmount(mntpt, cifs_sb,
full_path, referrals + i);
cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n",
__func__, referrals[i].node_name, mnt);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3530e1c3ff56..d7111b8ce36a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -186,9 +186,9 @@ static const struct super_operations debugfs_super_operations = {
static struct vfsmount *debugfs_automount(struct path *path)
{
- struct vfsmount *(*f)(void *);
- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
- return f(d_inode(path->dentry)->i_private);
+ debugfs_automount_t f;
+ f = (debugfs_automount_t)path->dentry->d_fsdata;
+ return f(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir);
*/
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 60f03b78914e..9323b9a8bc72 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -377,9 +377,8 @@ err:
* and will start a new collection. Eventually caller must submit the last
* segment if present.
*/
-static int readpage_strip(void *data, struct page *page)
+static int __readpage_strip(struct page_collect *pcol, struct page *page)
{
- struct page_collect *pcol = data;
struct inode *inode = pcol->inode;
struct exofs_i_info *oi = exofs_i(inode);
loff_t i_size = i_size_read(inode);
@@ -470,6 +469,13 @@ fail:
return ret;
}
+static int readpage_strip(struct file *data, struct page *page)
+{
+ struct page_collect *pcol = (struct page_collect *)data;
+
+ return __readpage_strip(pcol, page);
+}
+
static int exofs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
@@ -499,7 +505,7 @@ static int _readpage(struct page *page, bool read_4_write)
_pcol_init(&pcol, 1, page->mapping->host);
pcol.read_4_write = read_4_write;
- ret = readpage_strip(&pcol, page);
+ ret = __readpage_strip(&pcol, page);
if (ret) {
EXOFS_ERR("_readpage => %d\n", ret);
return ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 969beec3ff7e..87ab6150343b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -379,14 +379,15 @@ struct flex_groups {
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
@@ -1028,6 +1029,7 @@ struct ext4_inode_info {
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
@@ -1070,9 +1072,15 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
-#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
-#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_QUOTA 0x40000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x80000 /* "old" user quota,
+ * enable enforcement for hidden
+ * quota files */
+#define EXT4_MOUNT_GRPQUOTA 0x100000 /* "old" group quota, enable
+ * enforcement for hidden quota
+ * files */
+#define EXT4_MOUNT_PRJQUOTA 0x200000 /* Enable project quota
+ * enforcement */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
@@ -1283,7 +1291,7 @@ struct ext4_super_block {
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
@@ -1801,7 +1809,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
@@ -1843,6 +1852,11 @@ static inline bool ext4_has_incompat_features(struct super_block *sb)
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
@@ -2565,6 +2579,7 @@ extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 685a26e9540f..ea26a0dcb87b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -781,6 +781,13 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4fbede7d7e2f..b65680c5404b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4300,6 +4300,14 @@ static inline void ext4_iget_extra_inode(struct inode *inode,
EXT4_I(inode)->i_inline_off = 0;
}
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -4314,6 +4322,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
if ((!(flags & EXT4_IGET_SPECIAL) &&
(ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) ||
@@ -4389,12 +4398,20 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
@@ -4706,6 +4723,7 @@ static int ext4_do_update_inode(handle_t *handle,
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
@@ -4718,6 +4736,7 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
@@ -4795,6 +4814,15 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 313a61626d2d..9c6fe28fe27e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3218,6 +3218,12 @@ static int ext4_link(struct dentry *old_dentry,
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EXDEV;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -3527,6 +3533,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
!ext4_has_encryption_key(new_dir)))
return -ENOKEY;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3746,6 +3757,14 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.inode)))
return -EXDEV;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6d3b72c959c8..9f2ca5e2531a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1081,8 +1081,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1115,6 +1115,7 @@ static const struct dquot_operations ext4_quota_operations = {
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1170,7 +1171,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
@@ -1230,6 +1231,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
@@ -1449,8 +1451,11 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
@@ -1735,13 +1740,17 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
- "mount options ignored.");
- clear_opt(sb, USRQUOTA);
- clear_opt(sb, GRPQUOTA);
- } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
+ ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return 0;
+ }
+ if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -2635,6 +2644,12 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
@@ -3871,7 +3886,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@@ -5020,6 +5035,48 @@ restore_opts:
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
@@ -5052,6 +5109,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
@@ -5239,7 +5301,8 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
@@ -5270,14 +5333,21 @@ static int ext4_enable_quotas(struct super_block *sb)
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
+ };
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9fc6c2597dcc..1907299d4296 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -875,9 +875,9 @@ struct fuse_fill_data {
unsigned nr_pages;
};
-static int fuse_readpages_fill(void *_data, struct page *page)
+static int fuse_readpages_fill(struct file *_data, struct page *page)
{
- struct fuse_fill_data *data = _data;
+ struct fuse_fill_data *data = (struct fuse_fill_data *)_data;
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ca9c492a1885..adecbb7a13fe 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -29,7 +29,7 @@ static struct kmem_cache *fuse_inode_cachep;
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
-static int set_global_limit(const char *val, struct kernel_param *kp);
+static int set_global_limit(const char *val, const struct kernel_param *kp);
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
@@ -811,7 +811,7 @@ static void sanitize_global_limit(unsigned *limit)
*limit = (1 << 16) - 1;
}
-static int set_global_limit(const char *val, struct kernel_param *kp)
+static int set_global_limit(const char *val, const struct kernel_param *kp)
{
int rv;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 582ef53f2104..6e7b6cb3f6cd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -476,7 +476,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
*
*/
-static int __gfs2_readpage(void *file, struct page *page)
+static int __gfs2_readpage(struct file *file, struct page *page)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 21e719199a84..7fb45ee36897 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -740,10 +740,15 @@ err_out:
static void kernfs_release_file(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
- if (!(kn->flags & KERNFS_HAS_RELEASE))
- return;
+ /*
+ * @of is guaranteed to have no other file operations in flight and
+ * we just want to synchronize release and drain paths.
+ * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * here because drain path may be called from places which can
+ * cause circular dependency.
+ */
+ lockdep_assert_held(&kernfs_open_file_mutex);
- mutex_lock(&of->mutex);
if (!of->released) {
/*
* A file is never detached without being released and we
@@ -753,7 +758,6 @@ static void kernfs_release_file(struct kernfs_node *kn,
kn->attr.ops->release(of);
of->released = true;
}
- mutex_unlock(&of->mutex);
}
static int kernfs_fop_release(struct inode *inode, struct file *filp)
@@ -761,7 +765,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
struct kernfs_open_file *of = kernfs_of(filp);
- kernfs_release_file(kn, of);
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ mutex_lock(&kernfs_open_file_mutex);
+ kernfs_release_file(kn, of);
+ mutex_unlock(&kernfs_open_file_mutex);
+ }
+
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
@@ -794,7 +803,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
if (kn->flags & KERNFS_HAS_MMAP)
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
- kernfs_release_file(kn, of);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ kernfs_release_file(kn, of);
}
mutex_unlock(&kernfs_open_file_mutex);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index ef34e31a9d57..a71e996d6c55 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -50,7 +50,8 @@ static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
if (scops && scops->show_path)
return scops->show_path(sf, node, root);
- return seq_dentry(sf, dentry, " \t\n\\");
+ seq_dentry(sf, dentry, " \t\n\\");
+ return 0;
}
const struct super_operations kernfs_sops = {
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f038d4ac9aec..7890779f44b5 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -526,7 +526,7 @@ static struct ctl_table nlm_sysctl_root[] = {
*/
#define param_set_min_max(name, type, which_strtol, min, max) \
-static int param_set_##name(const char *val, struct kernel_param *kp) \
+static int param_set_##name(const char *val, const struct kernel_param *kp) \
{ \
char *endp; \
__typeof__(type) num = which_strtol(val, &endp, 0); \
diff --git a/fs/locks.c b/fs/locks.c
index 2c8e1e429cf7..4faeb3f6d3dc 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -230,6 +230,7 @@ locks_get_lock_context(struct inode *inode, int type)
ctx = smp_load_acquire(&inode->i_flctx);
}
out:
+ trace_locks_get_lock_context(inode, type, ctx);
return ctx;
}
@@ -934,7 +935,8 @@ out:
return error;
}
-static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ struct file_lock *conflock)
{
struct file_lock *fl, *tmp;
struct file_lock *new_fl = NULL;
@@ -1142,6 +1144,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
+ trace_posix_lock_inode(inode, request, error);
+
return error;
}
@@ -1162,7 +1166,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
int posix_lock_file(struct file *filp, struct file_lock *fl,
struct file_lock *conflock)
{
- return __posix_lock_file(file_inode(filp), fl, conflock);
+ return posix_lock_inode(file_inode(filp), fl, conflock);
}
EXPORT_SYMBOL(posix_lock_file);
@@ -1178,7 +1182,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
int error;
might_sleep ();
for (;;) {
- error = __posix_lock_file(inode, fl, NULL);
+ error = posix_lock_inode(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1260,7 +1264,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (filp) {
fl.fl_owner = filp;
fl.fl_flags &= ~FL_SLEEP;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (!error)
break;
}
@@ -1268,7 +1272,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
if (sleep)
fl.fl_flags |= FL_SLEEP;
fl.fl_owner = current->files;
- error = __posix_lock_file(inode, &fl, NULL);
+ error = posix_lock_inode(inode, &fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
@@ -2165,6 +2169,8 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (file_lock == NULL)
return -ENOLCK;
+ inode = file_inode(filp);
+
/*
* This might block, so we do it before checking the inode.
*/
@@ -2172,8 +2178,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
if (copy_from_user(&flock, l, sizeof(flock)))
goto out;
- inode = file_inode(filp);
-
/* Don't allow mandatory locks on files that may be memory mapped
* and shared.
*/
@@ -2242,6 +2246,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
}
}
out:
+ trace_fcntl_setlk(inode, file_lock, error);
locks_free_lock(file_lock);
return error;
}
@@ -2398,6 +2403,7 @@ out:
*/
void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
+ int error;
struct file_lock lock;
struct file_lock_context *ctx;
@@ -2420,10 +2426,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
lock.fl_ops = NULL;
lock.fl_lmops = NULL;
- vfs_lock_file(filp, F_SETLK, &lock, NULL);
+ error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
if (lock.fl_ops && lock.fl_ops->fl_release_private)
lock.fl_ops->fl_release_private(&lock);
+ trace_locks_remove_posix(file_inode(filp), &lock, error);
}
EXPORT_SYMBOL(locks_remove_posix);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80c8ebc..9a202f7ed755 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -33,9 +33,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
return submit_bio_wait(rw, &bio);
}
-static int bdev_readpage(void *_sb, struct page *page)
+static int bdev_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
struct block_device *bdev = logfs_super(sb)->s_bdev;
int err;
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c501449450d..4ae7d17f96e3 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -122,9 +122,9 @@ static void logfs_mtd_sync(struct super_block *sb)
mtd_sync(mtd);
}
-static int logfs_mtd_readpage(void *_sb, struct page *page)
+static int logfs_mtd_readpage(struct file *_sb, struct page *page)
{
- struct super_block *sb = _sb;
+ struct super_block *sb = (struct super_block *)_sb;
int err;
err = logfs_mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9b45d46d4c4..48085ab8bcd5 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -174,7 +174,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
if (!logfs_exist_block(dir, index))
continue;
page = read_cache_page(dir->i_mapping, index,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return page;
dd = kmap_atomic(page);
@@ -306,7 +306,7 @@ static int logfs_readdir(struct file *file, struct dir_context *ctx)
continue;
}
page = read_cache_page(dir->i_mapping, pos,
- (filler_t *)logfs_readpage, NULL);
+ logfs_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
dd = kmap(page);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5f0937609465..96322622870c 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -151,7 +151,7 @@ struct logfs_device_ops {
struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
struct page *(*find_last_sb)(struct super_block *sb, u64 *ofs);
int (*write_sb)(struct super_block *sb, struct page *page);
- int (*readpage)(void *_sb, struct page *page);
+ int (*readpage)(struct file *_sb, struct page *page);
void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
int ensure_write);
@@ -485,7 +485,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s,
#endif
/* dev_mtd.c */
-#ifdef CONFIG_MTD
+#if IS_ENABLED(CONFIG_MTD)
int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
#else
static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
diff --git a/fs/namei.c b/fs/namei.c
index 8f350e39be96..d6d2d0dec826 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,7 @@
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
#include <linux/hash.h>
+#include <linux/init_task.h>
#include <asm/uaccess.h>
#include "internal.h"
diff --git a/fs/namespace.c b/fs/namespace.c
index 2443cdb9ce86..6f9a6bee9d87 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1015,6 +1015,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
+struct vfsmount *
+vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
+ const char *name, void *data)
+{
+ /* Until it is worked out how to pass the user namespace
+ * through from the parent mount to the submount don't support
+ * unprivileged mounts with submounts.
+ */
+ if (mountpoint->d_sb->s_user_ns != &init_user_ns)
+ return ERR_PTR(-EPERM);
+
+ return vfs_kern_mount(type, MS_SUBMOUNT, name, data);
+}
+EXPORT_SYMBOL_GPL(vfs_submount);
+
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -2545,10 +2560,6 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
return -ENODEV;
if (user_ns != &init_user_ns) {
- if (!(type->fs_flags & FS_USERNS_MOUNT)) {
- put_filesystem(type);
- return -EPERM;
- }
/* Only in special cases allow devices from mounts
* created outside the initial user namespace.
*/
@@ -2868,7 +2879,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
- MS_STRICTATIME);
+ MS_STRICTATIME | MS_SUBMOUNT);
if (flags & MS_REMOUNT)
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d29ad4e02d33..f1804be48a3a 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -230,7 +230,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
- return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
+ return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
}
/**
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f592672373cb..b2a7f72eb116 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
mountdata->hostname,
mountdata->mnt_path);
- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
+ mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
if (!IS_ERR(mnt))
break;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0bb580174cb3..a25985705cd5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -343,7 +343,7 @@ struct nfs_readdesc {
};
static int
-readpage_async_filler(void *data, struct page *page)
+readpage_async_filler(struct file *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
struct nfs_page *new;
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index e5f911bd80d2..390827fa82ef 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,7 +1,6 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
- select ANON_INODES
default n
---help---
Say Y here to enable fanotify support. fanotify is a file access
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b981fc0c8379..0161c74e76e2 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,6 +1,5 @@
config INOTIFY_USER
bool "Inotify support for userspace"
- select ANON_INODES
select FSNOTIFY
default y
---help---
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b5cf27dcb18a..cb3703393264 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -88,13 +88,13 @@ struct workqueue_struct *user_dlm_worker;
*/
#define DLMFS_CAPABILITIES "bast stackglue"
static int param_set_dlmfs_capabilities(const char *val,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
printk(KERN_ERR "%s: readonly parameter\n", kp->name);
return -EINVAL;
}
static int param_get_dlmfs_capabilities(char *buffer,
- struct kernel_param *kp)
+ const struct kernel_param *kp)
{
return strlcpy(buffer, DLMFS_CAPABILITIES,
strlen(DLMFS_CAPABILITIES) + 1);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8da1fc940a86..c7402cb76f11 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -233,12 +233,12 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
goto out_mmput;
}
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
arg_start = mm->arg_start;
arg_end = mm->arg_end;
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
BUG_ON(arg_start > arg_end);
BUG_ON(env_start > env_end);
@@ -990,10 +990,10 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
- down_read(&mm->mmap_sem);
+ spin_lock(&mm->arg_lock);
env_start = mm->env_start;
env_end = mm->env_end;
- up_read(&mm->mmap_sem);
+ spin_unlock(&mm->arg_lock);
while (count > 0) {
size_t this_len, max_len;
@@ -3202,6 +3202,15 @@ static const struct file_operations proc_tgid_base_operations = {
.llseek = default_llseek,
};
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ if (!d_is_dir(file->f_path.dentry) ||
+ (file->f_op != &proc_tgid_base_operations))
+ return ERR_PTR(-EBADF);
+
+ return proc_pid(file_inode(file));
+}
+
static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return proc_pident_lookup(dir, dentry,
diff --git a/fs/read_write.c b/fs/read_write.c
index 7b175b9134ec..27023e8f531e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -21,9 +21,6 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
@@ -656,7 +653,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, iter_fn_t fn)
+ loff_t *ppos, int type)
{
struct kiocb kiocb;
ssize_t ret;
@@ -664,7 +661,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- ret = fn(&kiocb, iter);
+ if (type == READ)
+ ret = filp->f_op->read_iter(&kiocb, iter);
+ else
+ ret = filp->f_op->write_iter(&kiocb, iter);
BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
@@ -672,7 +672,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, io_fn_t fn)
+ loff_t *ppos, int type)
{
ssize_t ret = 0;
@@ -680,7 +680,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+ if (type == READ) {
+ nr = filp->f_op->read(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ } else {
+ nr = filp->f_op->write(filp, iovec.iov_base,
+ iovec.iov_len, ppos);
+ }
if (nr < 0) {
if (!ret)
@@ -783,8 +789,6 @@ static ssize_t do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = import_iovec(type, uvector, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
@@ -798,19 +802,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
@@ -957,8 +956,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
- io_fn_t fn;
- iter_fn_t iter_fn;
ret = compat_import_iovec(type, uvector, nr_segs,
UIO_FASTIOV, &iov, &iter);
@@ -972,19 +969,14 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (ret < 0)
goto out;
- if (type == READ) {
- fn = file->f_op->read;
- iter_fn = file->f_op->read_iter;
- } else {
- fn = (io_fn_t)file->f_op->write;
- iter_fn = file->f_op->write_iter;
+ if (type != READ)
file_start_write(file);
- }
- if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ if ((type == READ && file->f_op->read_iter) ||
+ (type == WRITE && file->f_op->write_iter))
+ ret = do_iter_readv_writev(file, &iter, pos, type);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, type);
if (type != READ)
file_end_write(file);
diff --git a/fs/super.c b/fs/super.c
index 34f1dc72064d..55cd8843b49a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -476,6 +476,10 @@ struct super_block *sget_userns(struct file_system_type *type,
struct super_block *old;
int err;
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
+ !(type->fs_flags & FS_USERNS_MOUNT) &&
+ !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
retry:
spin_lock(&sb_lock);
if (test) {
@@ -502,7 +506,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags, user_ns);
+ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -547,8 +551,15 @@ struct super_block *sget(struct file_system_type *type,
{
struct user_namespace *user_ns = current_user_ns();
+ /* We don't yet pass the user namespace of the parent
+ * mount through to here so always use &init_user_ns
+ * until that changes.
+ */
+ if (flags & MS_SUBMOUNT)
+ user_ns = &init_user_ns;
+
/* Ensure the requestor has permissions over the target filesystem */
- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return sget_userns(type, test, set, flags, user_ns, data);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 52ab4aa7f214..2014a1bc8f3d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -89,7 +89,7 @@
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
#define MCOUNT_REC() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_mcount_loc) = .; \
- *(__mcount_loc) \
+ KEEP(*(__mcount_loc)) \
VMLINUX_SYMBOL(__stop_mcount_loc) = .;
#else
#define MCOUNT_REC()
@@ -123,10 +123,10 @@
#ifdef CONFIG_EVENT_TRACING
#define FTRACE_EVENTS() . = ALIGN(8); \
VMLINUX_SYMBOL(__start_ftrace_events) = .; \
- *(_ftrace_events) \
+ KEEP(*(_ftrace_events)) \
VMLINUX_SYMBOL(__stop_ftrace_events) = .; \
VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .; \
- *(_ftrace_enum_map) \
+ KEEP(*(_ftrace_enum_map)) \
VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .;
#else
#define FTRACE_EVENTS()
@@ -169,8 +169,8 @@
#define _OF_TABLE_1(name) \
. = ALIGN(8); \
VMLINUX_SYMBOL(__##name##_of_table) = .; \
- *(__##name##_of_table) \
- *(__##name##_of_table_end)
+ KEEP(*(__##name##_of_table)) \
+ KEEP(*(__##name##_of_table_end))
#define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
#define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
@@ -196,9 +196,14 @@
*(.dtb.init.rodata) \
VMLINUX_SYMBOL(__dtb_end) = .;
-/* .data section */
+/*
+ * .data section
+ * -fdata-sections generates .data.identifier which needs to be pulled in
+ * with .data, but don't want to pull in .data..stuff which has its own
+ * requirements. Same for bss.
+ */
#define DATA_DATA \
- *(.data) \
+ *(.data .data.[0-9a-zA-Z_]*) \
*(.ref.data) \
*(.data..shared_aligned) /* percpu related */ \
MEM_KEEP(init.data) \
@@ -281,28 +286,28 @@
/* PCI quirks */ \
.pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \
- *(.pci_fixup_early) \
+ KEEP(*(.pci_fixup_early)) \
VMLINUX_SYMBOL(__end_pci_fixups_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_header) = .; \
- *(.pci_fixup_header) \
+ KEEP(*(.pci_fixup_header)) \
VMLINUX_SYMBOL(__end_pci_fixups_header) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_final) = .; \
- *(.pci_fixup_final) \
+ KEEP(*(.pci_fixup_final)) \
VMLINUX_SYMBOL(__end_pci_fixups_final) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_enable) = .; \
- *(.pci_fixup_enable) \
+ KEEP(*(.pci_fixup_enable)) \
VMLINUX_SYMBOL(__end_pci_fixups_enable) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume) = .; \
- *(.pci_fixup_resume) \
+ KEEP(*(.pci_fixup_resume)) \
VMLINUX_SYMBOL(__end_pci_fixups_resume) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .; \
- *(.pci_fixup_resume_early) \
+ KEEP(*(.pci_fixup_resume_early)) \
VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .; \
- *(.pci_fixup_suspend) \
+ KEEP(*(.pci_fixup_suspend)) \
VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .; \
VMLINUX_SYMBOL(__start_pci_fixups_suspend_late) = .; \
- *(.pci_fixup_suspend_late) \
+ KEEP(*(.pci_fixup_suspend_late)) \
VMLINUX_SYMBOL(__end_pci_fixups_suspend_late) = .; \
} \
\
@@ -318,76 +323,76 @@
/* Kernel symbol table: Normal symbols */ \
__ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab) = .; \
- *(SORT(___ksymtab+*)) \
+ KEEP(*(SORT(___ksymtab+*))) \
VMLINUX_SYMBOL(__stop___ksymtab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
- *(SORT(___ksymtab_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
- *(SORT(___ksymtab_unused+*)) \
+ KEEP(*(SORT(___ksymtab_unused+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
- *(SORT(___ksymtab_unused_gpl+*)) \
+ KEEP(*(SORT(___ksymtab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
- *(SORT(___ksymtab_gpl_future+*)) \
+ KEEP(*(SORT(___ksymtab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
} \
\
/* Kernel symbol table: Normal symbols */ \
__kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab) = .; \
- *(SORT(___kcrctab+*)) \
+ KEEP(*(SORT(___kcrctab+*))) \
VMLINUX_SYMBOL(__stop___kcrctab) = .; \
} \
\
/* Kernel symbol table: GPL-only symbols */ \
__kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl) = .; \
- *(SORT(___kcrctab_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .; \
} \
\
/* Kernel symbol table: Normal unused symbols */ \
__kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused) = .; \
- *(SORT(___kcrctab_unused+*)) \
+ KEEP(*(SORT(___kcrctab_unused+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused) = .; \
} \
\
/* Kernel symbol table: GPL-only unused symbols */ \
__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .; \
- *(SORT(___kcrctab_unused_gpl+*)) \
+ KEEP(*(SORT(___kcrctab_unused_gpl+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .; \
} \
\
/* Kernel symbol table: GPL-future-only symbols */ \
__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .; \
- *(SORT(___kcrctab_gpl_future+*)) \
+ KEEP(*(SORT(___kcrctab_gpl_future+*))) \
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
- *(__ksymtab_strings) \
+ KEEP(*(__ksymtab_strings)) \
} \
\
/* __*init sections */ \
@@ -400,7 +405,7 @@
/* Built-in module parameters. */ \
__param : AT(ADDR(__param) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___param) = .; \
- *(__param) \
+ KEEP(*(__param)) \
VMLINUX_SYMBOL(__stop___param) = .; \
} \
\
@@ -422,7 +427,7 @@
#define SECURITY_INIT \
.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .; \
}
@@ -432,7 +437,7 @@
ALIGN_FUNCTION(); \
*(.text.hot .text.hot.*) \
*(.text .text.fixup) \
- *(.text.unlikely .text.unlikely.*) \
+ *(.text.unlikely .text.unlikely .text.*) \
*(.text.unknown .text.unknown.*) \
*(.ref.text) \
*(.text.asan.* .text.tsan.*) \
@@ -481,7 +486,7 @@
VMLINUX_SYMBOL(__softirqentry_text_end) = .;
/* Section used for early init (in .S files) */
-#define HEAD_TEXT *(.head.text)
+#define HEAD_TEXT KEEP(*(.head.text))
#define HEAD_TEXT_SECTION \
.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \
@@ -495,7 +500,7 @@
. = ALIGN(align); \
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___ex_table) = .; \
- *(__ex_table) \
+ KEEP(*(__ex_table)) \
VMLINUX_SYMBOL(__stop___ex_table) = .; \
}
@@ -521,11 +526,12 @@
/* init and exit section handling */
#define INIT_DATA \
+ KEEP(*(SORT(___kentry+*))) \
*(.init.data) \
MEM_DISCARD(init.data) \
KERNEL_CTORS() \
MCOUNT_REC() \
- *(.init.rodata) \
+ *(.init.rodata .init.rodata.*) \
FTRACE_EVENTS() \
TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \
@@ -543,7 +549,7 @@
EARLYCON_TABLE()
#define INIT_TEXT \
- *(.init.text) \
+ *(.init.text .init.text.*) \
*(.text.startup) \
MEM_DISCARD(init.text)
@@ -560,7 +566,7 @@
MEM_DISCARD(exit.text)
#define EXIT_CALL \
- *(.exitcall.exit)
+ KEEP(*(.exitcall.exit))
/*
* bss (Block Started by Symbol) - uninitialized data
@@ -587,7 +593,7 @@
BSS_FIRST_SECTIONS \
*(.bss..page_aligned) \
*(.dynbss) \
- *(.bss) \
+ *(.bss .bss.[0-9a-zA-Z_]*) \
*(COMMON) \
}
@@ -636,7 +642,7 @@
. = ALIGN(8); \
__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start___bug_table) = .; \
- *(__bug_table) \
+ KEEP(*(__bug_table)) \
VMLINUX_SYMBOL(__stop___bug_table) = .; \
}
#else
@@ -665,17 +671,17 @@
#define INIT_SETUP(initsetup_align) \
. = ALIGN(initsetup_align); \
VMLINUX_SYMBOL(__setup_start) = .; \
- *(.init.setup) \
+ KEEP(*(.init.setup)) \
VMLINUX_SYMBOL(__setup_end) = .;
#define INIT_CALLS_LEVEL(level) \
VMLINUX_SYMBOL(__initcall##level##_start) = .; \
- *(.initcall##level##.init) \
- *(.initcall##level##s.init) \
+ KEEP(*(.initcall##level##.init)) \
+ KEEP(*(.initcall##level##s.init)) \
#define INIT_CALLS \
VMLINUX_SYMBOL(__initcall_start) = .; \
- *(.initcallearly.init) \
+ KEEP(*(.initcallearly.init)) \
INIT_CALLS_LEVEL(0) \
INIT_CALLS_LEVEL(1) \
INIT_CALLS_LEVEL(2) \
@@ -689,21 +695,21 @@
#define CON_INITCALL \
VMLINUX_SYMBOL(__con_initcall_start) = .; \
- *(.con_initcall.init) \
+ KEEP(*(.con_initcall.init)) \
VMLINUX_SYMBOL(__con_initcall_end) = .;
#define SECURITY_INITCALL \
VMLINUX_SYMBOL(__security_initcall_start) = .; \
- *(.security_initcall.init) \
+ KEEP(*(.security_initcall.init)) \
VMLINUX_SYMBOL(__security_initcall_end) = .;
#ifdef CONFIG_BLK_DEV_INITRD
#define INIT_RAM_FS \
. = ALIGN(4); \
VMLINUX_SYMBOL(__initramfs_start) = .; \
- *(.init.ramfs) \
+ KEEP(*(.init.ramfs)) \
. = ALIGN(8); \
- *(.init.ramfs.info)
+ KEEP(*(.init.ramfs.info))
#else
#define INIT_RAM_FS
#endif
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d169d274e31b..26dd401e12ae 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -1,7 +1,6 @@
#ifndef _BPF_CGROUP_H
#define _BPF_CGROUP_H
-#include <linux/bpf.h>
#include <linux/jump_label.h>
#include <uapi/linux/bpf.h>
@@ -55,6 +54,9 @@ int __cgroup_bpf_run_filter(struct sock *sk,
struct sk_buff *skb,
enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type);
+
/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
({ \
@@ -78,6 +80,16 @@ int __cgroup_bpf_run_filter(struct sock *sk,
__ret; \
})
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk) { \
+ __ret = __cgroup_bpf_run_filter_sk(sk, \
+ BPF_CGROUP_INET_SOCK_CREATE); \
+ } \
+ __ret; \
+})
+
#else
struct cgroup_bpf {};
@@ -86,6 +98,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8eb2e503fc3e..77476208f1b0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,7 +36,10 @@ struct bpf_map_ops {
};
struct bpf_map {
- atomic_t refcnt;
+ /* 1st cacheline with read-mostly members of which some
+ * are also accessed in fast-path (e.g. ops, max_entries).
+ */
+ const struct bpf_map_ops *ops ____cacheline_aligned;
enum bpf_map_type map_type;
u32 key_size;
u32 value_size;
@@ -44,10 +47,15 @@ struct bpf_map {
u32 map_flags;
u32 pages;
bool unpriv_array;
- struct user_struct *user;
- const struct bpf_map_ops *ops;
- struct work_struct work;
+ /* 7 bytes hole */
+
+ /* 2nd cacheline with misc members to avoid false sharing
+ * particularly with refcounting.
+ */
+ struct user_struct *user ____cacheline_aligned;
+ atomic_t refcnt;
atomic_t usercnt;
+ struct work_struct work;
#ifdef CONFIG_SECURITY
void *security;
#endif
@@ -299,6 +307,8 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
+int __bpf_prog_charge(struct user_struct *user, u32 pages);
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
@@ -376,6 +386,16 @@ static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
{
return ERR_PTR(-EOPNOTSUPP);
}
+
+static inline int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ return 0;
+}
+
+static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+}
+
static inline int bpf_obj_get_user(const char __user *pathname)
{
return -EOPNOTSUPP;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ee2772f769c2..d3036be98027 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -601,6 +601,7 @@ struct sock_cgroup_data {
struct {
u8 is_data : 1;
u8 no_refcnt : 1;
+ u8 unused : 6;
u8 padding;
u16 prioidx;
u32 classid;
@@ -610,6 +611,7 @@ struct sock_cgroup_data {
u32 classid;
u16 prioidx;
u8 padding;
+ u8 unused : 6;
u8 no_refcnt : 1;
u8 is_data : 1;
} __packed;
@@ -642,7 +644,7 @@ static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
u16 prioidx)
{
- struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
+ struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
return;
@@ -659,7 +661,7 @@ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
u32 classid)
{
- struct sock_cgroup_data skcd_buf = { .val = READ_ONCE(skcd->val) };
+ struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
if (sock_cgroup_classid(&skcd_buf) == classid)
return;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 24dd42910d7c..c4bdce045492 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -5,6 +5,8 @@
* syscall compatibility layer.
*/
+#include <linux/types.h>
+
#ifdef CONFIG_COMPAT
#include <linux/stat.h>
@@ -711,9 +713,22 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question. For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
#else
#define is_compat_task() (0)
+static inline bool in_compat_syscall(void) { return false; }
#endif /* CONFIG_COMPAT */
+
#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index bc8077e5e688..759fb028acc8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -201,6 +201,29 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
# define unreachable() do { } while (1)
#endif
+/*
+ * KENTRY - kernel entry point
+ * This can be used to annotate symbols (functions or data) that are used
+ * without their linker symbol being referenced explicitly. For example,
+ * interrupt vector handlers, or functions in the kernel image that are found
+ * programatically.
+ *
+ * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those
+ * are handled in their own way (with KEEP() in linker scripts).
+ *
+ * KENTRY can be avoided if the symbols in question are marked as KEEP() in the
+ * linker script. For example an architecture could KEEP() its entire
+ * boot/exception vector code rather than annotate each function and data.
+ */
+#ifndef KENTRY
+# define KENTRY(sym) \
+ extern typeof(sym) sym; \
+ static const unsigned long __kentry_##sym \
+ __used \
+ __attribute__((section("___kentry" "+" #sym ), used)) \
+ = (unsigned long)&sym;
+#endif
+
#ifndef RELOC_HIDE
# define RELOC_HIDE(ptr, off) \
({ unsigned long __ptr; \
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9302d016b89f..8e9d08dfbd18 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -123,6 +123,9 @@ struct cpufreq_policy {
unsigned int up_transition_delay_us;
unsigned int down_transition_delay_us;
+ /* Boost switch for tasks with p->in_iowait set */
+ bool iowait_boost_enable;
+
/* Cached frequency lookup from cpufreq_driver_resolve_freq. */
unsigned int cached_target_freq;
int cached_resolved_idx;
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 19c066dce1da..51362ff607fc 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -60,9 +60,10 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
const char *dest);
+typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data);
void debugfs_remove(struct dentry *dentry);
diff --git a/include/linux/export.h b/include/linux/export.h
index 96e45ea463e7..a84c76fdebd8 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -1,5 +1,6 @@
#ifndef _LINUX_EXPORT_H
#define _LINUX_EXPORT_H
+
/*
* Export symbols from the kernel to modules. Forked from module.h
* to reduce the amount of pointless cruft we feed to gcc when only
@@ -42,11 +43,11 @@ extern struct module __this_module;
#ifdef CONFIG_MODVERSIONS
/* Mark the CRC weak since genksyms apparently decides not to
* generate a checksums for some symbols */
-#define __CRC_SYMBOL(sym, sec) \
- extern __visible void *__crc_##sym __attribute__((weak)); \
- static const unsigned long __kcrctab_##sym \
- __used \
- __attribute__((section("___kcrctab" sec "+" #sym), unused)) \
+#define __CRC_SYMBOL(sym, sec) \
+ extern __visible void *__crc_##sym __attribute__((weak)); \
+ static const unsigned long __kcrctab_##sym \
+ __used \
+ __attribute__((section("___kcrctab" sec "+" #sym), used)) \
= (unsigned long) &__crc_##sym;
#else
#define __CRC_SYMBOL(sym, sec)
@@ -59,8 +60,7 @@ extern struct module __this_module;
static const char __kstrtab_##sym[] \
__attribute__((section("__ksymtab_strings"), aligned(1))) \
= VMLINUX_SYMBOL_STR(sym); \
- extern const struct kernel_symbol __ksymtab_##sym; \
- __visible const struct kernel_symbol __ksymtab_##sym \
+ static const struct kernel_symbol __ksymtab_##sym \
__used \
__attribute__((section("___ksymtab" sec "+" #sym), unused)) \
= { (unsigned long)&sym, __kstrtab_##sym }
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a85d7b71e329..c0b08bbe85b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -244,8 +244,16 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
return *this_cpu_ptr(ops->disabled);
}
+#ifdef CONFIG_CFI_CLANG
+/* Use a C stub with the correct type for CFI */
+static inline void ftrace_stub(unsigned long a0, unsigned long a1,
+ struct ftrace_ops *op, struct pt_regs *regs)
+{
+}
+#else
extern void ftrace_stub(unsigned long a0, unsigned long a1,
struct ftrace_ops *op, struct pt_regs *regs);
+#endif
#else /* !CONFIG_FUNCTION_TRACER */
/*
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a5f251d76018..858031d58899 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -535,7 +535,7 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order);
void page_alloc_init(void);
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
-void drain_local_pages(struct zone *zone);
+void drain_local_pages(void *zone);
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
void page_alloc_init_late(void);
diff --git a/include/linux/init.h b/include/linux/init.h
index 3561ea30ed8c..9ae494724b1b 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -163,24 +163,8 @@ extern bool initcall_debug;
#ifndef __ASSEMBLY__
-#ifdef CONFIG_LTO
-/* Work around a LTO gcc problem: when there is no reference to a variable
- * in a module it will be moved to the end of the program. This causes
- * reordering of initcalls which the kernel does not like.
- * Add a dummy reference function to avoid this. The function is
- * deleted by the linker.
- */
-#define LTO_REFERENCE_INITCALL(x) \
- ; /* yes this is needed */ \
- static __used __exit void *reference_##x(void) \
- { \
- return &x; \
- }
-#else
-#define LTO_REFERENCE_INITCALL(x)
-#endif
-
-/* initcalls are now grouped by functionality into separate
+/*
+ * initcalls are now grouped by functionality into separate
* subsections. Ordering inside the subsections is determined
* by link order.
* For backwards compatibility, initcall() puts the call in
@@ -188,12 +172,16 @@ extern bool initcall_debug;
*
* The `id' arg to __define_initcall() is needed so that multiple initcalls
* can point at the same handler without causing duplicate-symbol build errors.
+ *
+ * Initcalls are run by placing pointers in initcall sections that the
+ * kernel iterates at runtime. The linker can do dead code / data elimination
+ * and remove that completely, so the initcall sections have to be marked
+ * as KEEP() in the linker script.
*/
#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn; \
- LTO_REFERENCE_INITCALL(__initcall_##fn##id)
+ __attribute__((__section__(".initcall" #id ".init"))) = fn;
/*
* Early initcalls run before initializing SMP.
@@ -229,15 +217,15 @@ extern bool initcall_debug;
#define __initcall(fn) device_initcall(fn)
-#define __exitcall(fn) \
+#define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
-#define console_initcall(fn) \
- static initcall_t __initcall_##fn \
+#define console_initcall(fn) \
+ static initcall_t __initcall_##fn \
__used __section(.con_initcall.init) = fn
-#define security_initcall(fn) \
- static initcall_t __initcall_##fn \
+#define security_initcall(fn) \
+ static initcall_t __initcall_##fn \
__used __section(.security_initcall.init) = fn
struct obs_kernel_param {
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 021b1e9ff6cd..8aed56931361 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -208,6 +208,7 @@ extern struct task_group root_task_group;
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.nr_cpus_allowed= NR_CPUS, \
+ .cpus_requested = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
.restart_block = { \
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8da001eb82aa..0e57f41bde84 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -136,6 +136,9 @@ struct irq_domain;
* @node: node index useful for balancing
* @handler_data: per-IRQ data for the irq_chip methods
* @affinity: IRQ affinity on SMP
+ * @effective_affinity: The effective IRQ affinity on SMP as some irq
+ * chips do not allow multi CPU destinations.
+ * A subset of @affinity.
* @msi_desc: MSI descriptor
*/
struct irq_common_data {
@@ -146,6 +149,9 @@ struct irq_common_data {
void *handler_data;
struct msi_desc *msi_desc;
cpumask_var_t affinity;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ cpumask_var_t effective_affinity;
+#endif
};
/**
@@ -690,6 +696,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
return d->common->affinity;
}
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+ return d->common->effective_affinity;
+}
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+ const struct cpumask *m)
+{
+ cpumask_copy(d->common->effective_affinity, m);
+}
+#else
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+ const struct cpumask *m)
+{
+}
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+ return d->common->affinity;
+}
+#endif
+
unsigned int arch_dynirq_lower_bound(unsigned int from);
int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 29c17fae9bbf..1019e8d3c88f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -442,6 +442,8 @@ struct mm_struct {
unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */
unsigned long stack_vm; /* VM_GROWSUP/DOWN */
unsigned long def_flags;
+
+ spinlock_t arg_lock; /* protect the below fields */
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 52666d90ca94..060db5c327be 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -225,19 +225,11 @@ struct kparam_array
VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } }
/* Obsolete - use module_param_cb() */
-#define module_param_call(name, set, get, arg, perm) \
- static const struct kernel_param_ops __param_ops_##name = \
- { .flags = 0, (void *)set, (void *)get }; \
+#define module_param_call(name, _set, _get, arg, perm) \
+ static const struct kernel_param_ops __param_ops_##name = \
+ { .flags = 0, .set = _set, .get = _get }; \
__module_param_call(MODULE_PARAM_PREFIX, \
- name, &__param_ops_##name, arg, \
- (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
-
-/* We don't get oldget: it's often a new-style param_get_uint, etc. */
-static inline int
-__check_old_set_param(int (*oldset)(const char *, struct kernel_param *))
-{
- return 0;
-}
+ name, &__param_ops_##name, arg, perm, -1, 0)
#ifdef CONFIG_SYSFS
extern void kernel_param_lock(struct module *mod);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index b606d8f57adf..bbe48613cc80 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -90,6 +90,9 @@ struct file_system_type;
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data);
+extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
+ struct file_system_type *type,
+ const char *name, void *data);
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d2f4a732b3e8..ca7b7cbcb23a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -243,7 +243,7 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x)
__GFP_COLD | __GFP_NORETRY | __GFP_NOWARN);
}
-typedef int filler_t(void *, struct page *);
+typedef int filler_t(struct file *, struct page *);
pgoff_t page_cache_next_hole(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
@@ -392,7 +392,7 @@ extern int read_cache_pages(struct address_space *mapping,
static inline struct page *read_mapping_page(struct address_space *mapping,
pgoff_t index, void *data)
{
- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+ filler_t *filler = mapping->a_ops->readpage;
return read_cache_page(mapping, index, filler, data);
}
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 97b745ddece5..1d847d3de245 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
#define _LINUX_PID_H
#include <linux/rcupdate.h>
+#include <linux/wait.h>
enum pid_type
{
@@ -62,6 +63,8 @@ struct pid
unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
+ /* wait queue for pidfd notifications */
+ wait_queue_head_t wait_pidfd;
struct rcu_head rcu;
struct upid numbers[1];
};
@@ -74,6 +77,8 @@ struct pid_link
struct pid *pid;
};
+extern const struct file_operations pidfd_fops;
+
static inline struct pid *get_pid(struct pid *pid)
{
if (pid)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index b326d0a0cace..de3f442b7ab6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -41,6 +41,7 @@ extern void *proc_get_parent_data(const struct inode *);
extern void proc_remove(struct proc_dir_entry *);
extern void remove_proc_entry(const char *, struct proc_dir_entry *);
extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
+extern struct pid *tgid_pidfd_to_pid(const struct file *file);
#else /* CONFIG_PROC_FS */
@@ -72,6 +73,11 @@ static inline void proc_remove(struct proc_dir_entry *de) {}
#define remove_proc_entry(name, parent) do {} while (0)
static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
+static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+ return ERR_PTR(-EBADF);
+}
+
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_PROC_UID
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index 19bac77f81b1..8020dca6c61f 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -98,6 +98,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
if (!is_a_nulls(first))
WRITE_ONCE(first->pprev, &n->next);
}
+
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70c1f7f9e4fa..4e212132a274 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1668,6 +1668,15 @@ struct task_struct {
const struct sched_class *sched_class;
struct sched_entity se;
struct sched_rt_entity rt;
+#ifdef CONFIG_SCHED_WALT
+ struct ravg ravg;
+ /*
+ * 'init_load_pct' represents the initial task load assigned to children
+ * of this task
+ */
+ u32 init_load_pct;
+ u64 last_sleep_ts;
+#endif
#ifdef CONFIG_SCHED_HMP
struct ravg ravg;
/*
@@ -1700,6 +1709,7 @@ struct task_struct {
unsigned int policy;
int nr_cpus_allowed;
cpumask_t cpus_allowed;
+ cpumask_t cpus_requested;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -3723,6 +3733,7 @@ static inline unsigned long rlimit_max(unsigned int limit)
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)
#define SCHED_CPUFREQ_INTERCLUSTER_MIG (1U << 3)
+#define SCHED_CPUFREQ_WALT (1U << 4)
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1e1fcb8791a7..c85fe9872d07 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -41,6 +41,12 @@ extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_sync_hint_enable;
extern unsigned int sysctl_sched_cstate_aware;
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int sysctl_sched_walt_init_task_load_pct;
+extern unsigned int sysctl_sched_walt_cpu_high_irqload;
+#endif
#ifdef CONFIG_SCHED_HMP
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4f2933707b13..0c62b1d98ea0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -222,6 +222,11 @@ struct sk_buff;
#endif
extern int sysctl_max_skb_frags;
+/* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to
+ * segment using its current segmentation instead.
+ */
+#define GSO_BY_FRAGS 0xFFFF
+
typedef struct skb_frag_struct skb_frag_t;
struct skb_frag_struct {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5d2779aa4bbe..cf569d676909 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -879,6 +879,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
struct file_handle __user *handle,
int flags);
asmlinkage long sys_setns(int fd, int nstype);
+asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
asmlinkage long sys_process_vm_readv(pid_t pid,
const struct iovec __user *lvec,
unsigned long liovcnt,
@@ -909,4 +910,8 @@ asmlinkage long sys_membarrier(int cmd, int flags);
asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
+asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
+ siginfo_t __user *info,
+ unsigned int flags);
+
#endif
diff --git a/include/linux/time.h b/include/linux/time.h
index 62cc50700004..cbb55e004342 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -9,6 +9,7 @@
extern struct timezone sys_tz;
#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
+#define TIMER_LOCK_TIGHT_LOOP_DELAY_NS 350
static inline int timespec_equal(const struct timespec *a,
const struct timespec *b)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 87c094961bd5..32342754643a 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -98,11 +98,11 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
return udp_sk(sk)->no_check6_rx;
}
-#define udp_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry(__sk, list) \
+ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
-#define udp_portaddr_for_each_entry_rcu(__sk, node, list) \
- hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node)
+#define udp_portaddr_for_each_entry_rcu(__sk, list) \
+ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 07aed691fe00..fa06e7418f41 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -90,6 +90,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
+int ipv4_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard);
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 7ff588ca6817..28332bdac333 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -53,6 +53,7 @@ struct sock *__inet6_lookup_established(struct net *net,
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -60,6 +61,7 @@ struct sock *inet6_lookup_listener(struct net *net,
static inline struct sock *__inet6_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
const __be16 sport,
const struct in6_addr *daddr,
@@ -71,12 +73,12 @@ static inline struct sock *__inet6_lookup(struct net *net,
if (sk)
return sk;
- return inet6_lookup_listener(net, hashinfo, saddr, sport,
+ return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, hnum, dif);
}
static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
- struct sk_buff *skb,
+ struct sk_buff *skb, int doff,
const __be16 sport,
const __be16 dport,
int iif)
@@ -86,16 +88,19 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
- return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
- &ipv6_hdr(skb)->saddr, sport,
+ return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, &ipv6_hdr(skb)->saddr, sport,
&ipv6_hdr(skb)->daddr, ntohs(dport),
iif);
}
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif);
+
+int inet6_hash(struct sock *sk);
#endif /* IS_ENABLED(CONFIG_IPV6) */
#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif) \
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index de2e3ade6102..50f635c2c536 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -207,12 +207,16 @@ void inet_hashinfo_init(struct inet_hashinfo *h);
bool inet_ehash_insert(struct sock *sk, struct sock *osk);
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
-void __inet_hash(struct sock *sk, struct sock *osk);
-void inet_hash(struct sock *sk);
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard));
+int inet_hash(struct sock *sk);
void inet_unhash(struct sock *sk);
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr,
const unsigned short hnum,
@@ -220,10 +224,11 @@ struct sock *__inet_lookup_listener(struct net *net,
static inline struct sock *inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __inet_lookup_listener(net, hashinfo, saddr, sport,
+ return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport,
daddr, ntohs(dport), dif);
}
@@ -299,6 +304,7 @@ static inline struct sock *
static inline struct sock *__inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif)
@@ -307,12 +313,13 @@ static inline struct sock *__inet_lookup(struct net *net,
struct sock *sk = __inet_lookup_established(net, hashinfo,
saddr, sport, daddr, hnum, dif);
- return sk ? : __inet_lookup_listener(net, hashinfo, saddr, sport,
- daddr, hnum, dif);
+ return sk ? : __inet_lookup_listener(net, hashinfo, skb, doff, saddr,
+ sport, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const __be16 dport,
const int dif)
@@ -320,7 +327,8 @@ static inline struct sock *inet_lookup(struct net *net,
struct sock *sk;
local_bh_disable();
- sk = __inet_lookup(net, hashinfo, saddr, sport, daddr, dport, dif);
+ sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ dport, dif);
local_bh_enable();
return sk;
@@ -328,6 +336,7 @@ static inline struct sock *inet_lookup(struct net *net,
static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
struct sk_buff *skb,
+ int doff,
const __be16 sport,
const __be16 dport)
{
@@ -337,8 +346,8 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
if (sk)
return sk;
else
- return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo,
- iph->saddr, sport,
+ return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb,
+ doff, iph->saddr, sport,
iph->daddr, dport, inet_iif(skb));
}
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 3d4fb0e2a3bf..798e5f95494b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -374,6 +374,17 @@ static inline void ip_tunnel_unneed_metadata(void)
{
}
+static inline void ip_tunnel_info_opts_get(void *to,
+ const struct ip_tunnel_info *info)
+{
+}
+
+static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
+ const void *from, int len)
+{
+ info->options_len = 0;
+}
+
#endif /* CONFIG_INET */
#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e3f73fd1d53a..a42d65ca0c27 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -286,7 +286,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
struct kernel_param;
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
extern unsigned int nf_conntrack_htable_size;
extern unsigned int nf_conntrack_max;
extern unsigned int nf_conntrack_hash_rnd;
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 68e509750caa..039cc29cb4a8 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -51,7 +51,7 @@ void pn_sock_init(void);
struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa);
void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb);
void phonet_get_local_port_range(int *min, int *max);
-void pn_sock_hash(struct sock *sk);
+int pn_sock_hash(struct sock *sk);
void pn_sock_unhash(struct sock *sk);
int pn_sock_get_port(struct sock *sk, unsigned short sport);
diff --git a/include/net/ping.h b/include/net/ping.h
index ac80cb45e630..5fd7cc244833 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -65,7 +65,7 @@ struct pingfakehdr {
};
int ping_get_port(struct sock *sk, unsigned short ident);
-void ping_hash(struct sock *sk);
+int ping_hash(struct sock *sk);
void ping_unhash(struct sock *sk);
int ping_init_sock(struct sock *sk);
diff --git a/include/net/raw.h b/include/net/raw.h
index 6a40c6562dd2..3e789008394d 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -57,7 +57,7 @@ int raw_seq_open(struct inode *ino, struct file *file,
#endif
-void raw_hash_sk(struct sock *sk);
+int raw_hash_sk(struct sock *sk);
void raw_unhash_sk(struct sock *sk);
struct raw_sock {
diff --git a/include/net/sock.h b/include/net/sock.h
index c8d332b75a40..4c417486a1b8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -193,7 +193,7 @@ struct sock_common {
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
- struct hlist_nulls_node skc_portaddr_node;
+ struct hlist_node skc_portaddr_node;
};
struct proto *skc_prot;
possible_net_t skc_net;
@@ -699,18 +699,18 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry(__sk, list, sk_bind_node)
/**
- * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
+ * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @offset: offset of hlist_node within the struct.
*
*/
-#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
- for (pos = (head)->first; \
- (!is_a_nulls(pos)) && \
+#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
+ for (pos = rcu_dereference((head)->first); \
+ pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
- pos = pos->next)
+ pos = rcu_dereference(pos->next))
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -1019,7 +1019,7 @@ struct proto {
void (*release_cb)(struct sock *sk);
/* Keeping track of sk's, looking them up, and port selection methods. */
- void (*hash)(struct sock *sk);
+ int (*hash)(struct sock *sk);
void (*unhash)(struct sock *sk);
void (*rehash)(struct sock *sk);
int (*get_port)(struct sock *sk, unsigned short snum);
@@ -1348,10 +1348,10 @@ static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
/* With per-bucket locks this operation is not-atomic, so that
* this version is not worse.
*/
-static inline void __sk_prot_rehash(struct sock *sk)
+static inline int __sk_prot_rehash(struct sock *sk)
{
sk->sk_prot->unhash(sk);
- sk->sk_prot->hash(sk);
+ return sk->sk_prot->hash(sk);
}
void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 7dda3d7adba8..aecd30308d50 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,7 +16,7 @@ struct sock_reuseport {
};
extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
extern void reuseport_detach_sock(struct sock *sk);
extern struct sock *reuseport_select_sock(struct sock *sk,
u32 hash,
diff --git a/include/net/udp.h b/include/net/udp.h
index 13bd641be453..3f52b18ea12e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -59,7 +59,7 @@ struct udp_skb_cb {
* @lock: spinlock protecting changes to head/count
*/
struct udp_hslot {
- struct hlist_nulls_head head;
+ struct hlist_head head;
int count;
spinlock_t lock;
} __attribute__((aligned(2 * sizeof(long))));
@@ -177,9 +177,10 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
}
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
-static inline void udp_lib_hash(struct sock *sk)
+static inline int udp_lib_hash(struct sock *sk)
{
BUG();
+ return 0;
}
void udp_lib_unhash(struct sock *sk);
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index c72f2dc01d0b..63a7680347cb 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -34,6 +34,83 @@
{ F_WRLCK, "F_WRLCK" }, \
{ F_UNLCK, "F_UNLCK" })
+TRACE_EVENT(locks_get_lock_context,
+ TP_PROTO(struct inode *inode, int type, struct file_lock_context *ctx),
+
+ TP_ARGS(inode, type, ctx),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(unsigned char, type)
+ __field(struct file_lock_context *, ctx)
+ ),
+
+ TP_fast_assign(
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->type = type;
+ __entry->ctx = ctx;
+ ),
+
+ TP_printk("dev=0x%x:0x%x ino=0x%lx type=%s ctx=%p",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, show_fl_type(__entry->type), __entry->ctx)
+);
+
+DECLARE_EVENT_CLASS(filelock_lock,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+
+ TP_ARGS(inode, fl, ret),
+
+ TP_STRUCT__entry(
+ __field(struct file_lock *, fl)
+ __field(unsigned long, i_ino)
+ __field(dev_t, s_dev)
+ __field(struct file_lock *, fl_next)
+ __field(fl_owner_t, fl_owner)
+ __field(unsigned int, fl_pid)
+ __field(unsigned int, fl_flags)
+ __field(unsigned char, fl_type)
+ __field(loff_t, fl_start)
+ __field(loff_t, fl_end)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->fl = fl ? fl : NULL;
+ __entry->s_dev = inode->i_sb->s_dev;
+ __entry->i_ino = inode->i_ino;
+ __entry->fl_next = fl ? fl->fl_next : NULL;
+ __entry->fl_owner = fl ? fl->fl_owner : NULL;
+ __entry->fl_pid = fl ? fl->fl_pid : 0;
+ __entry->fl_flags = fl ? fl->fl_flags : 0;
+ __entry->fl_type = fl ? fl->fl_type : 0;
+ __entry->fl_start = fl ? fl->fl_start : 0;
+ __entry->fl_end = fl ? fl->fl_end : 0;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("fl=0x%p dev=0x%x:0x%x ino=0x%lx fl_next=0x%p fl_owner=0x%p fl_pid=%u fl_flags=%s fl_type=%s fl_start=%lld fl_end=%lld ret=%d",
+ __entry->fl, MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino, __entry->fl_next, __entry->fl_owner,
+ __entry->fl_pid, show_fl_flags(__entry->fl_flags),
+ show_fl_type(__entry->fl_type),
+ __entry->fl_start, __entry->fl_end, __entry->ret)
+);
+
+DEFINE_EVENT(filelock_lock, posix_lock_inode,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, fcntl_setlk,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
+DEFINE_EVENT(filelock_lock, locks_remove_posix,
+ TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+ TP_ARGS(inode, fl, ret));
+
DECLARE_EVENT_CLASS(filelock_lease,
TP_PROTO(struct inode *inode, struct file_lock *fl),
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 73cd7e502d4c..70d6012c89aa 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -1903,6 +1903,7 @@ TRACE_EVENT(walt_update_task_ravg,
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
__field( pid_t, cur_pid )
+ __field(unsigned int, cur_freq )
__field( u64, wallclock )
__field( u64, mark_start )
__field( u64, delta_m )
@@ -1930,6 +1931,7 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->evt = evt;
__entry->cpu = rq->cpu;
__entry->cur_pid = rq->curr->pid;
+ __entry->cur_freq = rq->cur_freq;
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->mark_start = p->ravg.mark_start;
@@ -1948,10 +1950,11 @@ TRACE_EVENT(walt_update_task_ravg,
__entry->active_windows = p->ravg.active_windows;
),
- TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
+ TP_printk("wc %llu ws %llu delta %llu event %d cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu"
" cs %llu ps %llu util %lu cur_window %u prev_window %u active_wins %u"
, __entry->wallclock, __entry->win_start, __entry->delta,
- __entry->evt, __entry->cpu, __entry->cur_pid,
+ __entry->evt, __entry->cpu,
+ __entry->cur_freq, __entry->cur_pid,
__entry->pid, __entry->comm, __entry->mark_start,
__entry->delta_m, __entry->demand,
__entry->sum, __entry->irqtime,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1324b0292ec2..a6b339e1d70e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -715,9 +715,13 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
__SYSCALL(__NR_membarrier, sys_membarrier)
#define __NR_mlock2 284
__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_pidfd_open 434
+__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#undef __NR_syscalls
-#define __NR_syscalls 285
+#define __NR_syscalls 435
/*
* All syscalls below here should go away really,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b8effe6bf417..bbbf36a963e6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,11 +99,13 @@ enum bpf_prog_type {
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
__MAX_BPF_ATTACH_TYPE
};
@@ -627,6 +629,10 @@ struct bpf_tunnel_key {
__u32 tunnel_label;
};
+struct bpf_sock {
+ __u32 bound_dev_if;
+};
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index c66b52303114..51caeb7797d0 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -93,6 +93,7 @@ struct inodes_stat_t {
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
#define MS_NOSEC (1<<28)
#define MS_BORN (1<<29)
#define MS_ACTIVE (1<<30)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 5f0fe019a720..ed6e31d9c195 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -9,6 +9,7 @@
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
diff --git a/init/Kconfig b/init/Kconfig
index af433ff5356f..a5ae0470b7b2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1462,9 +1462,6 @@ endchoice
config SYSCTL
bool
-config ANON_INODES
- bool
-
config HAVE_UID16
bool
@@ -1654,14 +1651,12 @@ config HAVE_FUTEX_CMPXCHG
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
- select ANON_INODES
help
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
config SIGNALFD
bool "Enable signalfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the signalfd() system call that allows to receive signals
@@ -1671,7 +1666,6 @@ config SIGNALFD
config TIMERFD
bool "Enable timerfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the timerfd() system call that allows to receive timer
@@ -1681,7 +1675,6 @@ config TIMERFD
config EVENTFD
bool "Enable eventfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the eventfd() system call that allows to receive both
@@ -1692,7 +1685,6 @@ config EVENTFD
# syscall, maps, verifier
config BPF_SYSCALL
bool "Enable bpf() system call"
- select ANON_INODES
select BPF
default n
help
@@ -1747,7 +1739,6 @@ config BPF_UNPRIV_DEFAULT_OFF
config USERFAULTFD
bool "Enable userfaultfd() system call"
- select ANON_INODES
depends on MMU
help
Enable the userfaultfd() system call that allows to intercept and
@@ -1799,7 +1790,6 @@ config PERF_EVENTS
bool "Kernel performance events and counters"
default y if PROFILING
depends on HAVE_PERF_EVENTS
- select ANON_INODES
select IRQ_WORK
select SRCU
help
diff --git a/init/Makefile b/init/Makefile
index 243f61de2cba..d210b235c5d7 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -2,6 +2,8 @@
# Makefile for the linux kernel.
#
+ccflags-y := -fno-function-sections -fno-data-sections
+
obj-y := main.o version.o mounts.o
obj-y += noinitramfs.o
obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 357ce8355d57..8210c7dd7532 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -424,3 +424,36 @@ int __cgroup_bpf_run_filter(struct sock *sk,
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
+
+/**
+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+ * @sk: sock structure to manipulate
+ * @type: The type of program to be exectuted
+ *
+ * socket is passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_prog *prog;
+ int ret = 0;
+
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]->progs[0]);
+ if (prog)
+ ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 95ffe1fac0bf..2b1a925489cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -60,11 +60,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
{
u8 *ptr = NULL;
- if (k >= SKF_NET_OFF)
+ if (k >= SKF_NET_OFF) {
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
+ } else if (k >= SKF_LL_OFF) {
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return NULL;
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
+ }
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
@@ -105,19 +107,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
gfp_extra_flags;
struct bpf_prog *fp;
+ u32 pages, delta;
+ int ret;
BUG_ON(fp_old == NULL);
size = round_up(size, PAGE_SIZE);
- if (size <= fp_old->pages * PAGE_SIZE)
+ pages = size / PAGE_SIZE;
+ if (pages <= fp_old->pages)
return fp_old;
+ delta = pages - fp_old->pages;
+ ret = __bpf_prog_charge(fp_old->aux->user, delta);
+ if (ret)
+ return NULL;
+
fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
- if (fp != NULL) {
+ if (fp == NULL) {
+ __bpf_prog_uncharge(fp_old->aux->user, delta);
+ } else {
kmemcheck_annotate_bitfield(fp, meta);
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
- fp->pages = size / PAGE_SIZE;
+ fp->pages = pages;
fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a73a056ccd88..f65bd2dc07f8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -652,19 +652,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long user_bufs;
+
+ if (user) {
+ user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+ if (user_bufs > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
-
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ int ret;
- atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(prog->pages, &user->locked_vm);
+ ret = __bpf_prog_charge(user, prog->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
+
prog->aux->user = user;
return 0;
}
@@ -673,7 +693,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = prog->aux->user;
- atomic_long_sub(prog->pages, &user->locked_vm);
+ __bpf_prog_uncharge(user, prog->pages);
free_uid(user);
}
@@ -933,7 +953,24 @@ static int bpf_prog_attach(const union bpf_attr *attr)
bpf_prog_put(prog);
cgroup_put(cgrp);
break;
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ prog = bpf_prog_get_type(attr->attach_bpf_fd,
+ BPF_PROG_TYPE_CGROUP_SOCK);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ attr->attach_flags);
+ if (ret)
+ bpf_prog_put(prog);
+ cgroup_put(cgrp);
+ break;
default:
return -EINVAL;
}
@@ -961,7 +998,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET_EGRESS:
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
-
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
default:
return -EINVAL;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d36c4f914a1e..c2508ca442b7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1544,6 +1544,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
+ u16 dfl_disable_ss_mask = 0;
lockdep_assert_held(&cgroup_mutex);
@@ -1560,8 +1561,28 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;
+
+ /*
+ * Collect ssid's that need to be disabled from default
+ * hierarchy.
+ */
+ if (ss->root == &cgrp_dfl_root)
+ dfl_disable_ss_mask |= 1 << ssid;
+
} while_each_subsys_mask();
+ if (dfl_disable_ss_mask) {
+ struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
+
+ /*
+ * Controllers from default hierarchy that need to be rebound
+ * are all disabled together in one go.
+ */
+ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
+
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
@@ -1570,10 +1591,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
WARN_ON(!css || cgroup_css(dcgrp, ss));
- /* disable from the source */
- src_root->subsys_mask &= ~(1 << ssid);
- WARN_ON(cgroup_apply_control(scgrp));
- cgroup_finalize_control(scgrp, 0);
+ if (src_root != &cgrp_dfl_root) {
+ /* disable from the source */
+ src_root->subsys_mask &= ~(1 << ssid);
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f3e6608313a2..a0bf3a7ce550 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -852,6 +852,20 @@ void rebuild_sched_domains(void)
put_online_cpus();
}
+static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p,
+ const struct cpumask *new_mask)
+{
+ int ret;
+
+ if (cpumask_subset(&p->cpus_requested, cs->cpus_requested)) {
+ ret = set_cpus_allowed_ptr(p, &p->cpus_requested);
+ if (!ret)
+ return ret;
+ }
+
+ return set_cpus_allowed_ptr(p, new_mask);
+}
+
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
@@ -867,7 +881,7 @@ static void update_tasks_cpumask(struct cpuset *cs)
css_task_iter_start(&cs->css, &it);
while ((task = css_task_iter_next(&it)))
- set_cpus_allowed_ptr(task, cs->effective_cpus);
+ update_cpus_allowed(cs, task, cs->effective_cpus);
css_task_iter_end(&it);
}
@@ -1556,7 +1570,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here
*/
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+ WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach));
cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
cpuset_update_task_spread_flag(cs, task);
diff --git a/kernel/exit.c b/kernel/exit.c
index babbc3c0a181..052aa52de331 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -616,6 +616,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
+ tsk->exit_state = EXIT_ZOMBIE;
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
diff --git a/kernel/fork.c b/kernel/fork.c
index 92a0df862115..c25ebf6dd7f9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,6 +11,7 @@
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/
+#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/unistd.h>
@@ -38,6 +39,7 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/seccomp.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
@@ -619,6 +621,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->pinned_vm = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
+ spin_lock_init(&mm->arg_lock);
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
@@ -1294,6 +1297,84 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
task->pids[type].pid = pid;
}
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+ struct pid *pid = file->private_data;
+
+ file->private_data = NULL;
+ put_pid(pid);
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct pid_namespace *ns = file_inode(m->file)->i_sb->s_fs_info;
+ struct pid *pid = f->private_data;
+
+ seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ seq_putc(m, '\n');
+}
+#endif
+
+/*
+ * Poll support for process exit notification.
+ */
+static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct task_struct *task;
+ struct pid *pid = file->private_data;
+ int poll_flags = 0;
+
+ poll_wait(file, &pid->wait_pidfd, pts);
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ /*
+ * Inform pollers only when the whole thread group exits.
+ * If the thread group leader exits before all other threads in the
+ * group, then poll(2) should block, similar to the wait(2) family.
+ */
+ if (!task || (task->exit_state && thread_group_empty(task)))
+ poll_flags = POLLIN | POLLRDNORM;
+ rcu_read_unlock();
+
+ return poll_flags;
+}
+
+const struct file_operations pidfd_fops = {
+ .release = pidfd_release,
+ .poll = pidfd_poll,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -1305,13 +1386,14 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
+ int __user *parent_tidptr,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
- int retval;
+ int pidfd = -1, retval;
struct task_struct *p;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
@@ -1356,6 +1438,31 @@ static struct task_struct *copy_process(unsigned long clone_flags,
return ERR_PTR(-EINVAL);
}
+ if (clone_flags & CLONE_PIDFD) {
+ int reserved;
+
+ /*
+ * - CLONE_PARENT_SETTID is useless for pidfds and also
+ * parent_tidptr is used to return pidfds.
+ * - CLONE_DETACHED is blocked so that we can potentially
+ * reuse it later for CLONE_PIDFD.
+ * - CLONE_THREAD is blocked until someone really needs it.
+ */
+ if (clone_flags &
+ (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Verify that parent_tidptr is sane so we can potentially
+ * reuse it later.
+ */
+ if (get_user(reserved, parent_tidptr))
+ return ERR_PTR(-EFAULT);
+
+ if (reserved != 0)
+ return ERR_PTR(-EINVAL);
+ }
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1538,6 +1645,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
+ /*
+ * This has to happen after we've potentially unshared the file
+ * descriptor table (so that the pidfd doesn't leak into the child
+ * if the fd table isn't shared).
+ */
+ if (clone_flags & CLONE_PIDFD) {
+ retval = pidfd_create(pid);
+ if (retval < 0)
+ goto bad_fork_free_pid;
+
+ pidfd = retval;
+ retval = put_user(pidfd, parent_tidptr);
+ if (retval)
+ goto bad_fork_put_pidfd;
+ }
+
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1587,7 +1710,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
retval = cgroup_can_fork(p);
if (retval)
- goto bad_fork_free_pid;
+ goto bad_fork_cgroup_threadgroup_change_end;
/*
* From this point on we must avoid any synchronous user-space
@@ -1698,8 +1821,12 @@ bad_fork_cancel_cgroup:
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
+bad_fork_cgroup_threadgroup_change_end:
+ cgroup_threadgroup_change_end(current);
+bad_fork_put_pidfd:
+ if (clone_flags & CLONE_PIDFD)
+ sys_close(pidfd);
bad_fork_free_pid:
- threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
@@ -1754,7 +1881,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+ task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
@@ -1799,7 +1926,7 @@ long _do_fork(unsigned long clone_flags,
trace = 0;
}
- p = copy_process(clone_flags, stack_start, stack_size,
+ p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
/*
* Do this prior waking up the new thread - the thread pointer
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 3b48dab80164..5d00ba9af4ec 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW
config GENERIC_IRQ_SHOW_LEVEL
bool
+# Supports effective affinity mask
+config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ bool
+
# Facility to allocate a hardware interrupt. This is legacy support
# and should not be used in new code. Use irq domains instead.
config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 4684b7595e63..9fad2dc50452 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -36,6 +36,10 @@ static bool migrate_one_irq(struct irq_desc *desc)
affinity = &available_cpus;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ const struct cpumask *default_affinity;
+
+ default_affinity = desc->affinity_hint ? : irq_default_affinity;
+
/*
* The order of preference for selecting a fallback CPU is
*
@@ -45,9 +49,9 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
cpumask_andnot(&available_cpus, cpu_online_mask,
cpu_isolated_mask);
- if (cpumask_intersects(&available_cpus, irq_default_affinity))
+ if (cpumask_intersects(&available_cpus, default_affinity))
cpumask_and(&available_cpus, &available_cpus,
- irq_default_affinity);
+ default_affinity);
else if (cpumask_empty(&available_cpus))
affinity = cpu_online_mask;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 52fbf88cd2d8..e0de4682f57a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -43,8 +43,19 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
gfp, node))
return -ENOMEM;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
+ GFP_KERNEL, node)) {
+ free_cpumask_var(desc->irq_common_data.affinity);
+ return -ENOMEM;
+ }
+#endif
+
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
free_cpumask_var(desc->irq_common_data.affinity);
return -ENOMEM;
}
@@ -127,6 +138,9 @@ static void free_masks(struct irq_desc *desc)
free_cpumask_var(desc->pending_mask);
#endif
free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
}
#else
static inline void free_masks(struct irq_desc *desc) { }
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index b05509af0352..9da4f2b075d1 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
+enum {
+ AFFINITY,
+ AFFINITY_LIST,
+ EFFECTIVE,
+ EFFECTIVE_LIST,
+};
+
static int show_irq_affinity(int type, struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
- const struct cpumask *mask = desc->irq_common_data.affinity;
+ const struct cpumask *mask;
+ switch (type) {
+ case AFFINITY:
+ case AFFINITY_LIST:
+ mask = desc->irq_common_data.affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (irqd_is_setaffinity_pending(&desc->irq_data))
- mask = desc->pending_mask;
+ if (irqd_is_setaffinity_pending(&desc->irq_data))
+ mask = desc->pending_mask;
#endif
- if (type)
+ break;
+ case EFFECTIVE:
+ case EFFECTIVE_LIST:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ mask = desc->irq_common_data.effective_affinity;
+ break;
+#else
+ return -EINVAL;
+#endif
+ };
+
+ switch (type) {
+ case AFFINITY_LIST:
+ case EFFECTIVE_LIST:
seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
- else
+ break;
+ case AFFINITY:
+ case EFFECTIVE:
seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
+ break;
+ }
return 0;
}
@@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
int no_irq_affinity;
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(0, m, v);
+ return show_irq_affinity(AFFINITY, m, v);
}
static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
{
- return show_irq_affinity(1, m, v);
+ return show_irq_affinity(AFFINITY_LIST, m, v);
}
@@ -188,6 +216,44 @@ static const struct file_operations irq_affinity_list_proc_fops = {
.write = irq_affinity_list_proc_write,
};
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static int irq_effective_aff_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE, m);
+}
+
+static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
+{
+ return show_irq_affinity(EFFECTIVE_LIST, m);
+}
+
+static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
+}
+
+static int irq_effective_aff_list_proc_open(struct inode *inode,
+ struct file *file)
+{
+ return single_open(file, irq_effective_aff_list_proc_show,
+ PDE_DATA(inode));
+}
+
+static const struct file_operations irq_effective_aff_proc_fops = {
+ .open = irq_effective_aff_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations irq_effective_aff_list_proc_fops = {
+ .open = irq_effective_aff_list_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
@@ -368,6 +434,12 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
proc_create_data("node", 0444, desc->dir,
&irq_node_proc_fops, (void *)(long)irq);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ proc_create_data("effective_affinity", 0444, desc->dir,
+ &irq_effective_aff_proc_fops, (void *)(long)irq);
+ proc_create_data("effective_affinity_list", 0444, desc->dir,
+ &irq_effective_aff_list_proc_fops, (void *)(long)irq);
+# endif
#endif
proc_create_data("spurious", 0444, desc->dir,
@@ -388,6 +460,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
remove_proc_entry("affinity_hint", desc->dir);
remove_proc_entry("smp_affinity_list", desc->dir);
remove_proc_entry("node", desc->dir);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+ remove_proc_entry("effective_affinity", desc->dir);
+ remove_proc_entry("effective_affinity_list", desc->dir);
+# endif
#endif
remove_proc_entry("spurious", desc->dir);
diff --git a/kernel/pid.c b/kernel/pid.c
index ccfdb56321c6..9bc06f3a2b54 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,8 @@
#include <linux/syscalls.h>
#include <linux/proc_ns.h>
#include <linux/proc_fs.h>
+#include <linux/anon_inodes.h>
+//#include <linux/sched/signal.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -331,6 +333,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
+ init_waitqueue_head(&pid->wait_pidfd);
+
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
@@ -564,6 +568,76 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return pid;
}
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
+/**
+ * pidfd_open() - Open new pid file descriptor.
+ *
+ * @pid: pid for which to retrieve a pidfd
+ * @flags: flags to pass
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set for
+ * the process identified by @pid. Currently, the process identified by
+ * @pid must be a thread-group leader. This restriction currently exists
+ * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
+ * be used with CLONE_THREAD) and pidfd polling (only supports thread group
+ * leaders).
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
+{
+ int fd, ret;
+ struct pid *p;
+ struct task_struct *tsk;
+
+ if (flags)
+ return -EINVAL;
+
+ if (pid <= 0)
+ return -EINVAL;
+
+ p = find_get_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ ret = 0;
+ rcu_read_lock();
+ tsk = pid_task(p, PIDTYPE_PID);
+ /* Check that pid belongs to a group leader task */
+ if (!tsk || !thread_group_leader(tsk))
+ ret = -EINVAL;
+ rcu_read_unlock();
+
+ fd = ret ?: pidfd_create(p);
+ put_pid(p);
+ return fd;
+}
+
/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index e6eceb0aa496..3e3ae5ed8100 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -477,8 +477,6 @@ int pm_qos_request_for_cpumask(int pm_qos_class, struct cpumask *mask)
val = c->default_value;
for_each_cpu(cpu, mask) {
- if (cpu_isolated(cpu))
- continue;
switch (c->type) {
case PM_QOS_MIN:
@@ -545,19 +543,29 @@ static void pm_qos_irq_release(struct kref *ref)
}
static void pm_qos_irq_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+ const cpumask_t *unused_mask)
{
unsigned long flags;
struct pm_qos_request *req = container_of(notify,
struct pm_qos_request, irq_notify);
struct pm_qos_constraints *c =
pm_qos_array[req->pm_qos_class]->constraints;
+ struct irq_desc *desc = irq_to_desc(req->irq);
+ struct cpumask *new_affinity =
+ irq_data_get_effective_affinity_mask(&desc->irq_data);
+ bool affinity_changed = false;
spin_lock_irqsave(&pm_qos_lock, flags);
- cpumask_copy(&req->cpus_affine, mask);
+ if (!cpumask_equal(&req->cpus_affine, new_affinity)) {
+ cpumask_copy(&req->cpus_affine, new_affinity);
+ affinity_changed = true;
+ }
+
spin_unlock_irqrestore(&pm_qos_lock, flags);
- pm_qos_update_target(c, req, PM_QOS_UPDATE_REQ, req->node.prio);
+ if (affinity_changed)
+ pm_qos_update_target(c, req, PM_QOS_UPDATE_REQ,
+ req->node.prio);
}
#endif
@@ -601,9 +609,17 @@ void pm_qos_add_request(struct pm_qos_request *req,
if (!desc)
return;
- mask = desc->irq_data.common->affinity;
- /* Get the current affinity */
+ /*
+ * If the IRQ is not started, the effective affinity
+ * won't be set. So fallback to the default affinity.
+ */
+ mask = irq_data_get_effective_affinity_mask(
+ &desc->irq_data);
+ if (cpumask_empty(mask))
+ mask = irq_data_get_affinity_mask(
+ &desc->irq_data);
+
cpumask_copy(&req->cpus_affine, mask);
req->irq_notify.irq = req->irq;
req->irq_notify.notify = pm_qos_irq_notify;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 6e7832ee6d74..18c322fe8b73 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -34,6 +34,10 @@
#include "power.h"
+#include <linux/gpio.h>
+extern int slst_gpio_base_id;
+#define PROC_AWAKE_ID 12 /* 12th bit */
+
const char *pm_labels[] = { "mem", "standby", "freeze", NULL };
const char *pm_states[PM_SUSPEND_MAX];
@@ -563,7 +567,9 @@ int pm_suspend(suspend_state_t state)
return -EINVAL;
pm_suspend_marker("entry");
+ gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 0);
error = enter_state(state);
+ gpio_set_value(slst_gpio_base_id + PROC_AWAKE_ID, 1);
if (error) {
suspend_stats.fail++;
dpm_save_failed_errno(error);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5f748c5a40f0..90fdf77dab7e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -650,6 +650,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
struct rcu_head *list;
struct rcu_head *next;
LIST_HEAD(rcu_tasks_holdouts);
+ int fract;
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
housekeeping_affine(current);
@@ -731,13 +732,25 @@ static int __noreturn rcu_tasks_kthread(void *arg)
* holdouts. When the list is empty, we are done.
*/
lastreport = jiffies;
- while (!list_empty(&rcu_tasks_holdouts)) {
+
+ /* Start off with HZ/10 wait and slowly back off to 1 HZ wait*/
+ fract = 10;
+
+ for (;;) {
bool firstreport;
bool needreport;
int rtst;
struct task_struct *t1;
- schedule_timeout_interruptible(HZ);
+ if (list_empty(&rcu_tasks_holdouts))
+ break;
+
+ /* Slowly back off waiting for holdouts */
+ schedule_timeout_interruptible(HZ/fract);
+
+ if (fract > 1)
+ fract--;
+
rtst = READ_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 &&
time_after(jiffies, lastreport + rtst);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7dde1b9918e4..ea301717538f 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -19,6 +19,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o idle.o sched_avg.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o energy.o
+obj-$(CONFIG_SCHED_WALT) += walt.o
obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 40a44876c74c..a8d2c50737ee 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -78,6 +78,7 @@
#include <linux/irq.h>
#include <linux/sched/core_ctl.h>
#include <linux/cpufreq_times.h>
+#include <linux/prefetch.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -97,6 +98,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>
+#include "walt.h"
ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head);
@@ -1084,6 +1086,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+ if (!(p->flags & PF_KTHREAD))
+ return false;
+
+ if (p->nr_cpus_allowed != 1)
+ return false;
+
+ return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+ return false;
+
+ if (is_per_cpu_kthread(p))
+ return cpu_online(cpu);
+
+ return cpu_active(cpu);
+}
+
/*
* This is how migration works:
*
@@ -1143,11 +1172,8 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
{
int src_cpu;
- if (unlikely(!cpu_active(dest_cpu)))
- return rq;
-
/* Affinity changed (again). */
- if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+ if (!is_cpu_allowed(p, dest_cpu))
return rq;
src_cpu = cpu_of(rq);
@@ -1364,6 +1390,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
p->se.nr_migrations++;
perf_event_task_migrate(p);
+ walt_fixup_busy_time(p, new_cpu);
fixup_busy_time(p, new_cpu);
}
@@ -1648,9 +1675,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p, bool allow_iso)
for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
- if (!cpu_online(dest_cpu))
- continue;
- if (!cpu_active(dest_cpu))
+ if (!is_cpu_allowed(p, dest_cpu))
continue;
if (cpu_isolated(dest_cpu)) {
if (allow_iso)
@@ -2132,6 +2157,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags,
raw_spin_lock(&rq->lock);
old_load = task_load(p);
+ wallclock = walt_ktime_clock();
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@@ -2230,6 +2258,11 @@ static void try_to_wake_up_local(struct task_struct *p)
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
cpufreq_update_util(rq, 0);
+
+ wallclock = walt_ktime_clock();
+
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+ walt_update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
note_task_waking(p, wallclock);
}
@@ -2362,6 +2395,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
INIT_LIST_HEAD(&p->se.group_node);
+ walt_init_new_task_load(p);
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
@@ -2646,6 +2680,7 @@ void wake_up_new_task(struct task_struct *p)
struct rq *rq;
add_new_task_to_grp(p);
+ walt_init_new_task_load(p);
raw_spin_lock_irqsave(&p->pi_lock, flags);
p->state = TASK_RUNNING;
@@ -2664,6 +2699,7 @@ void wake_up_new_task(struct task_struct *p)
#endif
rq = __task_rq_lock(p);
mark_task_starting(p);
+ walt_mark_task_starting(p);
update_rq_clock(rq);
post_init_entity_util_avg(&p->se);
activate_task(rq, p, ENQUEUE_WAKEUP_NEW);
@@ -3134,6 +3170,23 @@ EXPORT_PER_CPU_SYMBOL(kstat);
EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+ struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+ prefetch(curr);
+ prefetch(&curr->exec_start);
+}
+
+/*
* Return accounted runtime for the task.
* In case the task is currently running, return the runtime plus current's
* pending runtime that have not been accounted yet.
@@ -3167,6 +3220,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && task_on_rq_queued(p)) {
+ prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
}
@@ -3194,10 +3248,13 @@ void scheduler_tick(void)
raw_spin_lock(&rq->lock);
old_load = task_load(curr);
+ walt_set_window_start(rq);
set_window_start(rq);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
update_cpu_load_active(rq);
+ walt_update_task_ravg(rq->curr, rq, TASK_UPDATE,
+ walt_ktime_clock(), 0);
calc_global_load_tick(rq);
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
@@ -3561,6 +3618,9 @@ static void __sched notrace __schedule(bool preempt)
update_rq_clock(rq);
next = pick_next_task(rq, prev);
+ wallclock = walt_ktime_clock();
+ walt_update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+ walt_update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
rq->clock_skip_update = 0;
@@ -4902,6 +4962,9 @@ again:
retval = -EINVAL;
}
+ if (!retval && !(p->flags & PF_KTHREAD))
+ cpumask_and(&p->cpus_requested, in_mask, cpu_possible_mask);
+
out_free_new_mask:
free_cpumask_var(new_mask);
out_free_cpus_allowed:
@@ -5923,12 +5986,6 @@ int sched_isolate_cpu(int cpu)
cpumask_andnot(&avail_cpus, cpu_online_mask, cpu_isolated_mask);
- /* We cannot isolate ALL cpus in the system */
- if (cpumask_weight(&avail_cpus) == 1) {
- ret_code = -EINVAL;
- goto out;
- }
-
if (!cpu_online(cpu)) {
ret_code = -EINVAL;
goto out;
@@ -5937,6 +5994,13 @@ int sched_isolate_cpu(int cpu)
if (++cpu_isolation_vote[cpu] > 1)
goto out;
+ /* We cannot isolate ALL cpus in the system */
+ if (cpumask_weight(&avail_cpus) == 1) {
+ --cpu_isolation_vote[cpu];
+ ret_code = -EINVAL;
+ goto out;
+ }
+
/*
* There is a race between watchdog being enabled by hotplug and
* core isolation disabling the watchdog. When a CPU is hotplugged in
@@ -5960,7 +6024,9 @@ int sched_isolate_cpu(int cpu)
smp_call_function_any(&avail_cpus, hrtimer_quiesce_cpu, &cpu, 1);
smp_call_function_any(&avail_cpus, timer_quiesce_cpu, &cpu, 1);
+ irq_lock_sparse();
stop_cpus(cpumask_of(cpu), do_isolation_work_cpu_stop, 0);
+ irq_unlock_sparse();
calc_load_migrate(rq);
update_max_interval();
@@ -6319,6 +6385,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_UP_PREPARE:
raw_spin_lock_irqsave(&rq->lock, flags);
+ walt_set_window_start(rq);
set_window_start(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
rq->calc_load_update = calc_load_update;
@@ -6340,6 +6407,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
+ walt_migrate_sync_cpu(cpu);
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -8325,6 +8393,7 @@ void __init sched_init_smp(void)
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG();
+ cpumask_copy(&current->cpus_requested, cpu_possible_mask);
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);
@@ -8534,6 +8603,11 @@ void __init sched_init(void)
}
#endif
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+#ifdef CONFIG_SCHED_WALT
+ rq->cur_irqload = 0;
+ rq->avg_irqload = 0;
+ rq->irqload_ts = 0;
+#endif
INIT_LIST_HEAD(&rq->cfs_tasks);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 6c84b4d28914..6effb44aeb30 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -33,6 +33,7 @@ struct sugov_tunables {
struct gov_attr_set attr_set;
unsigned int up_rate_limit_us;
unsigned int down_rate_limit_us;
+ bool iowait_boost_enable;
};
struct sugov_policy {
@@ -228,6 +229,11 @@ static void sugov_get_util(unsigned long *util, unsigned long *max, u64 time)
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+
+ if (!sg_policy->tunables->iowait_boost_enable)
+ return;
+
if (flags & SCHED_CPUFREQ_IOWAIT) {
if (sg_cpu->iowait_boost_pending)
return;
@@ -510,12 +516,36 @@ static ssize_t down_rate_limit_us_store(struct gov_attr_set *attr_set,
return count;
}
+static ssize_t iowait_boost_enable_show(struct gov_attr_set *attr_set,
+ char *buf)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+ return sprintf(buf, "%u\n", tunables->iowait_boost_enable);
+}
+
+static ssize_t iowait_boost_enable_store(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+ bool enable;
+
+ if (kstrtobool(buf, &enable))
+ return -EINVAL;
+
+ tunables->iowait_boost_enable = enable;
+
+ return count;
+}
+
static struct governor_attr up_rate_limit_us = __ATTR_RW(up_rate_limit_us);
static struct governor_attr down_rate_limit_us = __ATTR_RW(down_rate_limit_us);
+static struct governor_attr iowait_boost_enable = __ATTR_RW(iowait_boost_enable);
static struct attribute *sugov_attributes[] = {
&up_rate_limit_us.attr,
&down_rate_limit_us.attr,
+ &iowait_boost_enable.attr,
NULL
};
@@ -675,6 +705,8 @@ static int sugov_init(struct cpufreq_policy *policy)
}
}
+ tunables->iowait_boost_enable = policy->iowait_boost_enable;
+
policy->governor_data = sg_policy;
sg_policy->tunables = tunables;
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 14225d5d8617..867cb7877511 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -133,6 +133,8 @@ retry:
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ cpumask_andnot(lowest_mask, lowest_mask,
+ cpu_isolated_mask);
if (drop_nopreempts)
drop_nopreempt_cpus(lowest_mask);
/*
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e6ec68c15aa3..cf6729cb46dd 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -6,6 +6,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq_times.h>
#include "sched.h"
+#include "walt.h"
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -79,9 +80,10 @@ void irqtime_account_irq(struct task_struct *curr)
irq_time_write_end();
- if (account)
+ if (account) {
+ walt_account_irqtime(cpu, curr, delta, wallclock);
sched_account_irqtime(cpu, curr, delta, wallclock);
- else if (curr != this_cpu_ksoftirqd())
+ } else if (curr != this_cpu_ksoftirqd())
sched_account_irqstart(cpu, curr, wallclock);
local_irq_restore(flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51443a801af5..266fc95f6c0f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -55,6 +55,12 @@ unsigned int normalized_sysctl_sched_latency = 6000000ULL;
unsigned int sysctl_sched_sync_hint_enable = 1;
unsigned int sysctl_sched_cstate_aware = 1;
+#ifdef CONFIG_SCHED_WALT
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =
+ (10 * NSEC_PER_MSEC);
+#endif
/*
* The initial- and re-scaling of tunables is configurable
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
@@ -4326,6 +4332,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
*/
#define UPDATE_TG 0x1
#define SKIP_AGE_LOAD 0x2
+#define SKIP_CPUFREQ 0x4
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct sched_entity *se, int flags)
@@ -4346,7 +4353,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
cfs_rq->curr == se, NULL);
}
- decayed = update_cfs_rq_load_avg(now, cfs_rq, true);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !(flags & SKIP_CPUFREQ));
decayed |= propagate_entity_load_avg(se);
if (decayed && (flags & UPDATE_TG))
@@ -4522,6 +4529,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
#define UPDATE_TG 0x0
#define SKIP_AGE_LOAD 0x0
+#define SKIP_CPUFREQ 0x0
static inline void update_load_avg(struct sched_entity *se, int not_used1){}
static inline void
@@ -4744,6 +4752,8 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
+ int update_flags;
+
/*
* Update run-time statistics of the 'current'.
*/
@@ -4757,7 +4767,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* - For group entity, update its weight to reflect the new share
* of its group cfs_rq.
*/
- update_load_avg(se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(se, update_flags);
dequeue_entity_load_avg(cfs_rq, se);
update_stats_dequeue(cfs_rq, se);
@@ -5052,7 +5067,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
now = sched_clock_cpu(smp_processor_id());
cfs_b->runtime = cfs_b->quota;
- cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
}
static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -5074,7 +5088,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
struct task_group *tg = cfs_rq->tg;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
- u64 amount = 0, min_amount, expires;
+ u64 amount = 0, min_amount;
/* note: this is a positive sum as runtime_remaining <= 0 */
min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -5091,61 +5105,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
cfs_b->idle = 0;
}
}
- expires = cfs_b->runtime_expires;
raw_spin_unlock(&cfs_b->lock);
cfs_rq->runtime_remaining += amount;
- /*
- * we may have advanced our local expiration to account for allowed
- * spread between our sched_clock and the one on which runtime was
- * issued.
- */
- if ((s64)(expires - cfs_rq->runtime_expires) > 0)
- cfs_rq->runtime_expires = expires;
return cfs_rq->runtime_remaining > 0;
}
-/*
- * Note: This depends on the synchronization provided by sched_clock and the
- * fact that rq->clock snapshots this value.
- */
-static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
-{
- struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
-
- /* if the deadline is ahead of our clock, nothing to do */
- if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0))
- return;
-
- if (cfs_rq->runtime_remaining < 0)
- return;
-
- /*
- * If the local deadline has passed we have to consider the
- * possibility that our sched_clock is 'fast' and the global deadline
- * has not truly expired.
- *
- * Fortunately we can check determine whether this the case by checking
- * whether the global deadline has advanced. It is valid to compare
- * cfs_b->runtime_expires without any locks since we only care about
- * exact equality, so a partial write will still work.
- */
-
- if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
- /* extend local deadline, drift is bounded above by 2 ticks */
- cfs_rq->runtime_expires += TICK_NSEC;
- } else {
- /* global deadline is ahead, expiration has passed */
- cfs_rq->runtime_remaining = 0;
- }
-}
-
static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
{
/* dock delta_exec before expiring quota (as it could span periods) */
cfs_rq->runtime_remaining -= delta_exec;
- expire_cfs_rq_runtime(cfs_rq);
if (likely(cfs_rq->runtime_remaining > 0))
return;
@@ -5379,8 +5349,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
cpu_temp(cpu_of(rq)));
}
-static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
- u64 remaining, u64 expires)
+static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
{
struct cfs_rq *cfs_rq;
u64 runtime;
@@ -5401,7 +5370,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
remaining -= runtime;
cfs_rq->runtime_remaining += runtime;
- cfs_rq->runtime_expires = expires;
/* we check whether we're throttled above */
if (cfs_rq->runtime_remaining > 0)
@@ -5426,7 +5394,7 @@ next:
*/
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
{
- u64 runtime, runtime_expires;
+ u64 runtime;
int throttled;
/* no need to continue the timer with no bandwidth constraint */
@@ -5454,8 +5422,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
/* account preceding periods in which throttling occurred */
cfs_b->nr_throttled += overrun;
- runtime_expires = cfs_b->runtime_expires;
-
/*
* This check is repeated as we are holding onto the new bandwidth while
* we unthrottle. This can potentially race with an unthrottled group
@@ -5468,8 +5434,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
cfs_b->distribute_running = 1;
raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */
- runtime = distribute_cfs_runtime(cfs_b, runtime,
- runtime_expires);
+ runtime = distribute_cfs_runtime(cfs_b, runtime);
raw_spin_lock(&cfs_b->lock);
cfs_b->distribute_running = 0;
@@ -5546,8 +5511,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
return;
raw_spin_lock(&cfs_b->lock);
- if (cfs_b->quota != RUNTIME_INF &&
- cfs_rq->runtime_expires == cfs_b->runtime_expires) {
+ if (cfs_b->quota != RUNTIME_INF) {
cfs_b->runtime += slack_runtime;
/* we are under rq->lock, defer unthrottling using a timer */
@@ -5579,7 +5543,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
{
u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
- u64 expires;
/* confirm we're still not at a refresh boundary */
raw_spin_lock(&cfs_b->lock);
@@ -5596,7 +5559,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
runtime = cfs_b->runtime;
- expires = cfs_b->runtime_expires;
if (runtime)
cfs_b->distribute_running = 1;
@@ -5605,11 +5567,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
if (!runtime)
return;
- runtime = distribute_cfs_runtime(cfs_b, runtime, expires);
+ runtime = distribute_cfs_runtime(cfs_b, runtime);
raw_spin_lock(&cfs_b->lock);
- if (expires == cfs_b->runtime_expires)
- cfs_b->runtime -= min(runtime, cfs_b->runtime);
+ cfs_b->runtime -= min(runtime, cfs_b->runtime);
cfs_b->distribute_running = 0;
raw_spin_unlock(&cfs_b->lock);
}
@@ -5936,6 +5897,25 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
#ifdef CONFIG_SMP
int task_new = flags & ENQUEUE_WAKEUP_NEW;
+
+ /*
+ * Update SchedTune accounting.
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ *
+ * We do it also in the case where we enqueue a throttled task;
+ * we could argue that a throttled task should not boost a CPU,
+ * however:
+ * a) properly implementing CPU boosting considering throttled
+ * tasks will increase a lot the complexity of the solution
+ * b) it's not easy to quantify the benefits introduced by
+ * such a more complex solution.
+ * Thus, for the time being we go for the simple solution and boost
+ * also for throttled RQs.
+ */
+ schedtune_enqueue_task(p, cpu_of(rq));
#endif
/*
@@ -5961,6 +5941,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running++;
+ walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
flags = ENQUEUE_WAKEUP;
@@ -5969,6 +5950,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running++;
+ walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p);
inc_cfs_rq_hmp_stats(cfs_rq, p, 1);
if (cfs_rq_throttled(cfs_rq))
@@ -5984,27 +5966,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
#ifdef CONFIG_SMP
-
- /*
- * Update SchedTune accounting.
- *
- * We do it before updating the CPU capacity to ensure the
- * boost value of the current task is accounted for in the
- * selection of the OPP.
- *
- * We do it also in the case where we enqueue a throttled task;
- * we could argue that a throttled task should not boost a CPU,
- * however:
- * a) properly implementing CPU boosting considering throttled
- * tasks will increase a lot the complexity of the solution
- * b) it's not easy to quantify the benefits introduced by
- * such a more complex solution.
- * Thus, for the time being we go for the simple solution and boost
- * also for throttled RQs.
- */
- schedtune_enqueue_task(p, cpu_of(rq));
-
if (energy_aware() && !se) {
+ walt_inc_cumulative_runnable_avg(rq, p);
if (!task_new && !rq->rd->overutilized &&
cpu_overutilized(rq->cpu)) {
rq->rd->overutilized = true;
@@ -6029,6 +5992,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct sched_entity *se = &p->se;
int task_sleep = flags & DEQUEUE_SLEEP;
+ if (task_sleep && rq->nr_running == 1)
+ flags |= DEQUEUE_IDLE;
+
+#ifdef CONFIG_SMP
+ /*
+ * Update SchedTune accounting
+ *
+ * We do it before updating the CPU capacity to ensure the
+ * boost value of the current task is accounted for in the
+ * selection of the OPP.
+ */
+ schedtune_dequeue_task(p, cpu_of(rq));
+#endif
+
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);
@@ -6042,6 +6019,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running--;
+ walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
/* Don't dequeue parent if it has other entities besides us */
@@ -6060,14 +6038,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
}
for_each_sched_entity(se) {
+ int update_flags;
+
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running--;
+ walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p);
dec_cfs_rq_hmp_stats(cfs_rq, p, 1);
if (cfs_rq_throttled(cfs_rq))
break;
- update_load_avg(se, UPDATE_TG);
+ update_flags = UPDATE_TG;
+
+ if (flags & DEQUEUE_IDLE)
+ update_flags |= SKIP_CPUFREQ;
+
+ update_load_avg(se, update_flags);
update_cfs_shares(se);
}
@@ -6076,19 +6062,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
dec_rq_hmp_stats(rq, p, 1);
}
-#ifdef CONFIG_SMP
-
- /*
- * Update SchedTune accounting
- *
- * We do it before updating the CPU capacity to ensure the
- * boost value of the current task is accounted for in the
- * selection of the OPP.
- */
- schedtune_dequeue_task(p, cpu_of(rq));
-
-#endif /* CONFIG_SMP */
-
hrtick_update(rq);
}
@@ -7098,6 +7071,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
static inline unsigned long task_util(struct task_struct *p)
{
+#ifdef CONFIG_SCHED_WALT
+ if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+ unsigned long demand = p->ravg.demand;
+ return (demand << 10) / walt_ravg_window;
+ }
+#endif
return p->se.avg.util_avg;
}
@@ -7656,6 +7635,11 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu,
if (new_util > capacity_orig)
continue;
+#ifdef CONFIG_SCHED_WALT
+ if (walt_cpu_high_irqload(i))
+ continue;
+#endif
+
/*
* Case A) Latency sensitive tasks
*
@@ -8953,7 +8937,17 @@ redo:
if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
goto next;
- if ((load / 2) > env->imbalance)
+ /*
+ * p is not running task when we goes until here, so if p is one
+ * of the 2 task in src cpu rq and not the running one,
+ * that means it is the only task that can be balanced.
+ * So only when there is other tasks can be balanced or
+ * there is situation to ignore big task, it is needed
+ * to skip the task load bigger than 2*imbalance.
+ */
+ if (((cpu_rq(env->src_cpu)->nr_running > 2) ||
+ (env->flags & LBF_IGNORE_BIG_TASKS)) &&
+ ((load / 2) > env->imbalance))
goto next;
detach_task(p, env);
@@ -10446,8 +10440,10 @@ static int need_active_balance(struct lb_env *env)
* It's worth migrating the task if the src_cpu's capacity is reduced
* because of other sched_class or IRQs if more capacity stays
* available on dst_cpu.
+ * Avoid pulling the CFS task if it is the only task running.
*/
if ((env->idle != CPU_NOT_IDLE) &&
+ (env->src_rq->nr_running > 1) &&
(env->src_rq->cfs.h_nr_running == 1)) {
if ((check_cpu_capacity(env->src_rq, sd)) &&
(capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
@@ -10685,7 +10681,24 @@ more_balance:
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(env.flags & LBF_ALL_PINNED)) {
cpumask_clear_cpu(cpu_of(busiest), cpus);
- if (!cpumask_empty(cpus)) {
+ /*
+ * dst_cpu is not a valid busiest cpu in the following
+ * check since load cannot be pulled from dst_cpu to be
+ * put on dst_cpu.
+ */
+ cpumask_clear_cpu(env.dst_cpu, cpus);
+ /*
+ * Go back to "redo" iff the load-balance cpumask
+ * contains other potential busiest cpus for the
+ * current sched domain.
+ */
+ if (cpumask_intersects(cpus, sched_domain_span(env.sd))) {
+ /*
+ * Now that the check has passed, reenable
+ * dst_cpu so that load can be calculated on
+ * it in the redo path.
+ */
+ cpumask_set_cpu(env.dst_cpu, cpus);
env.loop = 0;
env.loop_break = sched_nr_migrate_break;
goto redo;
@@ -11772,7 +11785,8 @@ static inline bool vruntime_normalized(struct task_struct *p)
* - A task which has been woken up by try_to_wake_up() and
* waiting for actually being woken up by sched_ttwu_pending().
*/
- if (!se->sum_exec_runtime || p->state == TASK_WAKING)
+ if (!se->sum_exec_runtime ||
+ (p->state == TASK_WAKING && p->sched_class == &fair_sched_class))
return true;
return false;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 598656b42203..649d6a437a13 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -764,13 +764,16 @@ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
unsigned int
min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
-/* Min window size (in ns) = 10ms */
-#define MIN_SCHED_RAVG_WINDOW 10000000
+/* Min window size (in ns) = 20ms */
+#define MIN_SCHED_RAVG_WINDOW ((20000000 / TICK_NSEC) * TICK_NSEC)
/* Max window size (in ns) = 1s */
-#define MAX_SCHED_RAVG_WINDOW 1000000000
+#define MAX_SCHED_RAVG_WINDOW ((1000000000 / TICK_NSEC) * TICK_NSEC)
-/* Window size (in ns) */
+/*
+ * Window size (in ns). Adjust for the tick size so that the window
+ * rollover occurs just before the tick boundary.
+ */
__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
/* Maximum allowed threshold before freq aggregation must be enabled */
@@ -1616,17 +1619,20 @@ static inline int exiting_task(struct task_struct *p)
static int __init set_sched_ravg_window(char *str)
{
+ unsigned int adj_window;
unsigned int window_size;
get_option(&str, &window_size);
- if (window_size < MIN_SCHED_RAVG_WINDOW ||
- window_size > MAX_SCHED_RAVG_WINDOW) {
- WARN_ON(1);
- return -EINVAL;
- }
+ /* Adjust for CONFIG_HZ */
+ adj_window = (window_size / TICK_NSEC) * TICK_NSEC;
+
+ /* Warn if we're a bit too far away from the expected window size */
+ WARN(adj_window < window_size - NSEC_PER_MSEC,
+ "tick-adjusted window size %u, original was %u\n", adj_window,
+ window_size);
- sched_ravg_window = window_size;
+ sched_ravg_window = adj_window;
return 0;
}
@@ -3657,8 +3663,10 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
migrate_top_tasks(p, src_rq, dest_rq);
if (!same_freq_domain(new_cpu, task_cpu(p))) {
- cpufreq_update_util(dest_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG);
- cpufreq_update_util(src_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG);
+ cpufreq_update_util(dest_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG |
+ SCHED_CPUFREQ_WALT);
+ cpufreq_update_util(src_rq, SCHED_CPUFREQ_INTERCLUSTER_MIG |
+ SCHED_CPUFREQ_WALT);
}
if (p == src_rq->ed_task) {
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 391ec29c71c0..ac81704e14d9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -12,6 +12,7 @@
#include <linux/hrtimer.h>
#include "tune.h"
+#include "walt.h"
int sched_rr_timeslice = RR_TIMESLICE;
int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
@@ -1445,10 +1446,15 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+#ifdef CONFIG_SMP
+ schedtune_enqueue_task(p, cpu_of(rq));
+#endif
+
if (flags & ENQUEUE_WAKEUP)
rt_se->timeout = 0;
enqueue_rt_entity(rt_se, flags);
+ walt_inc_cumulative_runnable_avg(rq, p);
inc_hmp_sched_stats_rt(rq, p);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
@@ -1486,8 +1492,13 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct sched_rt_entity *rt_se = &p->rt;
+#ifdef CONFIG_SMP
+ schedtune_dequeue_task(p, cpu_of(rq));
+#endif
+
update_curr_rt(rq);
dequeue_rt_entity(rt_se, flags);
+ walt_dec_cumulative_runnable_avg(rq, p);
dec_hmp_sched_stats_rt(rq, p);
dequeue_pushable_task(rq, p);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 90cc450dff7e..78ba150f2016 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -226,9 +226,8 @@ struct cfs_bandwidth {
ktime_t period;
u64 quota, runtime;
s64 hierarchical_quota;
- u64 runtime_expires;
- int idle, period_active;
+ short idle, period_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;
@@ -511,6 +510,10 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */
+#ifdef CONFIG_SCHED_WALT
+ u64 cumulative_runnable_avg;
+#endif
+
#ifdef CONFIG_CFS_BANDWIDTH
#ifdef CONFIG_SCHED_HMP
@@ -518,7 +521,6 @@ struct cfs_rq {
#endif
int runtime_enabled;
- u64 runtime_expires;
s64 runtime_remaining;
u64 throttled_clock, throttled_clock_task;
@@ -819,6 +821,7 @@ struct rq {
#endif
#ifdef CONFIG_SCHED_WALT
+ unsigned int cur_freq;
u64 cumulative_runnable_avg;
u64 window_start;
u64 curr_runnable_sum;
@@ -2027,6 +2030,7 @@ static const u32 prio_to_wmult[40] = {
#define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
+#define DEQUEUE_IDLE 0x80 /* The last dequeue before IDLE */
#define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02
@@ -2856,8 +2860,10 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
#ifdef CONFIG_SCHED_HMP
/*
* Skip if we've already reported, but not if this is an inter-cluster
- * migration
+ * migration. Also only allow WALT update sites.
*/
+ if (!(flags & SCHED_CPUFREQ_WALT))
+ return;
if (!sched_disable_window_stats &&
(rq->load_reported_window == rq->window_start) &&
!(flags & SCHED_CPUFREQ_INTERCLUSTER_MIG))
@@ -2865,7 +2871,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
rq->load_reported_window = rq->window_start;
#endif
- data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+ data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
+ cpu_of(rq)));
if (data)
data->func(data, rq_clock(rq), flags);
}
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 3278c81cefb1..0fa11d86599e 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -1,4 +1,5 @@
#include "sched.h"
+#include "walt.h"
/*
* stop-task scheduling class.
@@ -78,6 +79,7 @@ static void
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
add_nr_running(rq, 1);
+ walt_inc_cumulative_runnable_avg(rq, p);
inc_hmp_sched_stats_stop(rq, p);
}
@@ -85,6 +87,7 @@ static void
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{
sub_nr_running(rq, 1);
+ walt_dec_cumulative_runnable_avg(rq, p);
dec_hmp_sched_stats_stop(rq, p);
}
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index b84d13750604..d0ef97f484b1 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -240,7 +240,7 @@ schedtune_accept_deltas(int nrg_delta, int cap_delta,
* implementation especially for the computation of the per-CPU boost
* value
*/
-#define BOOSTGROUPS_COUNT 5
+#define BOOSTGROUPS_COUNT 6
/* Array of configured boostgroups */
static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
@@ -770,6 +770,7 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
static void schedtune_attach(struct cgroup_taskset *tset)
{
+#ifdef CONFIG_SCHED_HMP
struct task_struct *task;
struct cgroup_subsys_state *css;
struct schedtune *st;
@@ -782,6 +783,7 @@ static void schedtune_attach(struct cgroup_taskset *tset)
cgroup_taskset_for_each(task, css, tset)
sync_cgroup_colocation(task, colocate);
+#endif
}
static struct cftype files[] = {
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 8d25ffbe4fed..911606537808 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -57,11 +57,6 @@ static unsigned int sync_cpu;
static ktime_t ktime_last;
static bool walt_ktime_suspended;
-static unsigned int task_load(struct task_struct *p)
-{
- return p->ravg.demand;
-}
-
static inline void fixup_cum_window_demand(struct rq *rq, s64 delta)
{
rq->cum_window_demand += delta;
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index de7edac43674..34c72a0fcf39 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -54,6 +54,8 @@ static inline void walt_set_window_start(struct rq *rq) { }
static inline void walt_migrate_sync_cpu(int cpu) { }
static inline void walt_init_cpu_efficiency(void) { }
static inline u64 walt_ktime_clock(void) { return 0; }
+static inline void walt_account_irqtime(int cpu, struct task_struct *curr,
+ u64 delta, u64 wallclock) { }
#define walt_cpu_high_irqload(cpu) false
diff --git a/kernel/signal.c b/kernel/signal.c
index a699055ebfe8..a9697e189a58 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,7 +14,9 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
@@ -1632,6 +1634,14 @@ ret:
return ret;
}
+static void do_notify_pidfd(struct task_struct *task)
+{
+ struct pid *pid;
+
+ pid = task_pid(task);
+ wake_up_all(&pid->wait_pidfd);
+}
+
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1655,6 +1665,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ /* Wake up all pidfd waiters */
+ do_notify_pidfd(tsk);
+
if (sig != SIGCHLD) {
/*
* This is only possible if parent == real_parent.
@@ -2922,6 +2935,15 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
return ret;
}
+static inline void prepare_kill_siginfo(int sig, struct siginfo *info)
+{
+ info->si_signo = sig;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+ info->si_pid = task_tgid_vnr(current);
+ info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
@@ -2931,15 +2953,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_USER;
- info.si_pid = task_tgid_vnr(current);
- info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ prepare_kill_siginfo(sig, &info);
return kill_something_info(sig, &info, pid);
}
+/*
+ * Verify that the signaler and signalee either are in the same pid namespace
+ * or that the signaler's pid namespace is an ancestor of the signalee's pid
+ * namespace.
+ */
+static bool access_pidfd_pidns(struct pid *pid)
+{
+ struct pid_namespace *active = task_active_pid_ns(current);
+ struct pid_namespace *p = ns_of_pid(pid);
+
+ for (;;) {
+ if (!p)
+ return false;
+ if (p == active)
+ break;
+ p = p->parent;
+ }
+
+ return true;
+}
+
+static struct pid *pidfd_to_pid(const struct file *file)
+{
+ if (file->f_op == &pidfd_fops)
+ return file->private_data;
+
+ return tgid_pidfd_to_pid(file);
+}
+
+static int copy_siginfo_from_user_any(siginfo_t *kinfo, siginfo_t __user *info)
+{
+#ifdef CONFIG_COMPAT
+ /*
+ * Avoid hooking up compat syscalls and instead handle necessary
+ * conversions here. Note, this is a stop-gap measure and should not be
+ * considered a generic solution.
+ */
+ if (in_compat_syscall())
+ return copy_siginfo_from_user32(
+ kinfo, (struct compat_siginfo __user *)info);
+#endif
+ return copy_from_user(kinfo, info, sizeof(siginfo_t));
+}
+
+/**
+ * sys_pidfd_send_signal - Signal a process through a pidfd
+ * @pidfd: file descriptor of the process
+ * @sig: signal to send
+ * @info: signal info
+ * @flags: future flags
+ *
+ * The syscall currently only signals via PIDTYPE_PID which covers
+ * kill(<positive-pid>, <signal>. It does not signal threads or process
+ * groups.
+ * In order to extend the syscall to threads and process groups the @flags
+ * argument should be used. In essence, the @flags argument will determine
+ * what is signaled and not the file descriptor itself. Put in other words,
+ * grouping is a property of the flags argument not a property of the file
+ * descriptor.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+ siginfo_t __user *, info, unsigned int, flags)
+{
+ int ret;
+ struct fd f;
+ struct pid *pid;
+ siginfo_t kinfo;
+
+ /* Enforce flags be set to 0 until we add an extension. */
+ if (flags)
+ return -EINVAL;
+
+ f = fdget(pidfd);
+ if (!f.file)
+ return -EBADF;
+
+ /* Is this a pidfd? */
+ pid = pidfd_to_pid(f.file);
+ if (IS_ERR(pid)) {
+ ret = PTR_ERR(pid);
+ goto err;
+ }
+
+ ret = -EINVAL;
+ if (!access_pidfd_pidns(pid))
+ goto err;
+
+ if (info) {
+ ret = copy_siginfo_from_user_any(&kinfo, info);
+ if (unlikely(ret))
+ goto err;
+
+ ret = -EINVAL;
+ if (unlikely(sig != kinfo.si_signo))
+ goto err;
+
+ /* Only allow sending arbitrary signals to yourself. */
+ ret = -EPERM;
+ if ((task_pid(current) != pid) &&
+ (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+ goto err;
+ } else {
+ prepare_kill_siginfo(sig, &kinfo);
+ }
+
+ ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+ fdput(f);
+ return ret;
+}
+
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
diff --git a/kernel/sys.c b/kernel/sys.c
index d5ea3360038c..25cf2aa72d3b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1854,7 +1854,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
return error;
}
- down_write(&mm->mmap_sem);
+ /*
+ * arg_lock protects concurent updates but we still need mmap_sem for
+ * read to exclude races with sys_brk.
+ */
+ down_read(&mm->mmap_sem);
/*
* We don't validate if these members are pointing to
@@ -1868,6 +1872,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
* to any problem in kernel itself
*/
+ spin_lock(&mm->arg_lock);
mm->start_code = prctl_map.start_code;
mm->end_code = prctl_map.end_code;
mm->start_data = prctl_map.start_data;
@@ -1879,6 +1884,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
+ spin_unlock(&mm->arg_lock);
/*
* Note this update of @saved_auxv is lockless thus
@@ -1891,7 +1897,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
if (prctl_map.auxv_size)
memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
- up_write(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
}
#endif /* CONFIG_CHECKPOINT_RESTORE */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e2aab9cf058b..8980bdffde3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -575,6 +575,36 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_SCHED_WALT
+ {
+ .procname = "sched_use_walt_cpu_util",
+ .data = &sysctl_sched_use_walt_cpu_util,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_use_walt_task_util",
+ .data = &sysctl_sched_use_walt_task_util,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_walt_init_task_load_pct",
+ .data = &sysctl_sched_walt_init_task_load_pct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sched_walt_cpu_high_irqload",
+ .data = &sysctl_sched_walt_cpu_high_irqload,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{
.procname = "sched_cstate_aware",
.data = &sysctl_sched_cstate_aware,
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4171fee2d4ec..612c97156df7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -25,6 +25,7 @@
#include <linux/posix-timers.h>
#include <linux/workqueue.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
#ifdef CONFIG_MSM_PM
#include "lpm-levels.h"
@@ -503,6 +504,7 @@ int alarm_cancel(struct alarm *alarm)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 79fadcad21ff..6bd4247198e2 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,6 +49,7 @@
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
#include <asm/uaccess.h>
@@ -156,6 +157,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
}
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
@@ -1061,6 +1063,7 @@ int hrtimer_cancel(struct hrtimer *timer)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 67646a316436..2d8b82d90c9f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -798,6 +798,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
spin_unlock_irqrestore(&base->lock, *flags);
}
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
@@ -1148,6 +1149,7 @@ int del_timer_sync(struct timer_list *timer)
if (ret >= 0)
return ret;
cpu_relax();
+ ndelay(TIMER_LOCK_TIGHT_LOOP_DELAY_NS);
}
}
EXPORT_SYMBOL(del_timer_sync);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c5484723abda..74fa302d9a68 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -128,8 +128,9 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
-#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs);
+#define ftrace_ops_list_func ftrace_ops_no_ops
#endif
/*
@@ -5229,7 +5230,8 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
@@ -5677,14 +5679,17 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
}
+void ftrace_graph_return_stub(struct ftrace_graph_ret *trace)
+{
+}
+
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
return 0;
}
/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -5912,7 +5917,7 @@ void unregister_ftrace_graph(void)
goto out;
ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_return = ftrace_graph_return_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
__ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a67f792fb950..17996354c745 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7026,7 +7026,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
}
-static struct vfsmount *trace_automount(void *ingore)
+static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
@@ -7039,7 +7039,7 @@ static struct vfsmount *trace_automount(void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+ mnt = vfs_submount(mntpt, type, "tracefs", NULL);
put_filesystem(type);
if (IS_ERR(mnt))
return NULL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 8309b5b773cd..1466db70dc42 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2245,7 +2245,7 @@ static struct page *wait_on_page_read(struct page *page)
static struct page *do_read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *, struct page *),
+ int (*filler)(struct file *, struct page *),
void *data,
gfp_t gfp)
{
@@ -2360,7 +2360,7 @@ out:
*/
struct page *read_cache_page(struct address_space *mapping,
pgoff_t index,
- int (*filler)(void *, struct page *),
+ int (*filler)(struct file *, struct page *),
void *data)
{
return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
@@ -2382,7 +2382,7 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
pgoff_t index,
gfp_t gfp)
{
- filler_t *filler = (filler_t *)mapping->a_ops->readpage;
+ filler_t *filler = mapping->a_ops->readpage;
return do_read_cache_page(mapping, index, filler, NULL, gfp);
}
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 975e49f00f34..02a6962d5b0b 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -21,6 +21,7 @@ struct mm_struct init_mm = {
.mm_count = ATOMIC_INIT(1),
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+ .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
diff --git a/mm/ksm.c b/mm/ksm.c
index 2a4ef426b331..98920d1d59ed 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -580,8 +580,9 @@ again:
* case this node is no longer referenced, and should be freed;
* however, it might mean that the page is under page_freeze_refs().
* The __remove_mapping() case is easy, again the node is now stale;
- * but if page is swapcache in migrate_page_move_mapping(), it might
- * still be our page, in which case it's essential to keep the node.
+ * the same is in reuse_ksm_page() case; but if page is swapcache
+ * in migrate_page_move_mapping(), it might still be our page,
+ * in which case it's essential to keep the node.
*/
while (!get_page_unless_zero(page)) {
/*
diff --git a/mm/memory.c b/mm/memory.c
index 09a57fe6ae01..0c69908d3eed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2399,39 +2399,31 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Take out anonymous pages first, anonymous shared vmas are
* not dirty accountable.
*/
- if (PageAnon(old_page) && !PageKsm(old_page)) {
- if (!trylock_page(old_page)) {
- page_cache_get(old_page);
- pte_unmap_unlock(page_table, ptl);
- lock_page(old_page);
- page_table = pte_offset_map_lock(mm, pmd, address,
- &ptl);
- if (!pte_same(*page_table, orig_pte)) {
- unlock_page(old_page);
- pte_unmap_unlock(page_table, ptl);
- page_cache_release(old_page);
- return 0;
- }
- page_cache_release(old_page);
- }
- if (reuse_swap_page(old_page)) {
- /*
- * The page is all ours. Move it to our anon_vma so
- * the rmap code will not search our parent or siblings.
- * Protected against the rmap code by the page lock.
- */
- page_move_anon_rmap(old_page, vma, address);
+ if (PageAnon(old_page)) {
+ /* PageKsm() doesn't necessarily raise the page refcount */
+ if (PageKsm(old_page) || page_count(old_page) != 1)
+ goto copy;
+ if (!trylock_page(old_page))
+ goto copy;
+ if (PageKsm(old_page) || page_mapcount(old_page) != 1 || page_count(old_page) != 1) {
unlock_page(old_page);
- return wp_page_reuse(mm, vma, address, page_table, ptl,
- orig_pte, old_page, 0, 0);
+ goto copy;
}
+ /*
+ * Ok, we've got the only map reference, and the only
+ * page count reference, and the page is locked,
+ * it's dark out, and we're wearing sunglasses. Hit it.
+ */
unlock_page(old_page);
+ wp_page_reuse(mm, vma, address, page_table, ptl,
+ orig_pte, old_page, 0, 0);
+ return VM_FAULT_WRITE;
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
return wp_page_shared(mm, vma, address, page_table, pmd,
ptl, orig_pte, old_page);
}
-
+copy:
/*
* Ok, we need to copy. Oh, well..
*/
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd0c4bc00941..c7126875d8dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2087,8 +2087,9 @@ static void drain_pages(unsigned int cpu)
* The CPU has to be pinned. When zone parameter is non-NULL, spill just
* the single zone's pages.
*/
-void drain_local_pages(struct zone *zone)
+void drain_local_pages(void *z)
{
+ struct zone *zone = (struct zone *)z;
int cpu = smp_processor_id();
if (zone)
@@ -2148,8 +2149,7 @@ void drain_all_pages(struct zone *zone)
else
cpumask_clear_cpu(cpu, &cpus_with_pcps);
}
- on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
- zone, 1);
+ on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, zone, 1);
}
#ifdef CONFIG_HIBERNATION
diff --git a/mm/readahead.c b/mm/readahead.c
index fb1d210dbf05..3e25601ce14b 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -81,7 +81,7 @@ static void read_cache_pages_invalidate_pages(struct address_space *mapping,
* Hides the details of the LRU cache etc from the filesystems.
*/
int read_cache_pages(struct address_space *mapping, struct list_head *pages,
- int (*filler)(void *, struct page *), void *data)
+ int (*filler)(struct file *, struct page *), void *data)
{
struct page *page;
int ret = 0;
diff --git a/net/compat.c b/net/compat.c
index 14459a87fdbc..53e58bfe0dc6 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -356,7 +356,8 @@ static int do_set_sock_timeout(struct socket *sock, int level,
static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
- if (optname == SO_ATTACH_FILTER)
+ if (optname == SO_ATTACH_FILTER ||
+ optname == SO_ATTACH_REUSEPORT_CBPF)
return do_set_attach_filter(sock, level, optname,
optval, optlen);
if (!COMPAT_USE_64BIT_TIME &&
diff --git a/net/core/filter.c b/net/core/filter.c
index fa6ec1310621..ebf7754810f0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1398,7 +1398,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1433,7 +1433,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -2594,6 +2594,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
default:
return NULL;
}
@@ -2719,6 +2721,29 @@ static bool sk_filter_is_valid_access(int off, int size,
return __is_valid_access(off, size, type);
}
+static bool sock_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (type == BPF_WRITE) {
+ switch (off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ break;
+ default:
+ return false;
+ }
+ }
+
+ if (off < 0 || off + size > sizeof(struct bpf_sock))
+ return false;
+
+ /* The verifier guarantees that size > 0. */
+ if (off % size != 0)
+ return false;
+
+ return true;
+}
+
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -2977,6 +3002,30 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
return insn - insn_buf;
}
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+ int dst_reg, int src_reg,
+ int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_sock, bound_dev_if):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sock, sk_bound_dev_if));
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
struct bpf_insn *insn_buf,
@@ -3050,6 +3099,12 @@ static const struct bpf_verifier_ops cg_skb_ops = {
.convert_ctx_access = sk_filter_convert_ctx_access,
};
+static const struct bpf_verifier_ops cg_sock_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+ .convert_ctx_access = sock_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3075,6 +3130,11 @@ static struct bpf_prog_type_list cg_skb_type __read_mostly = {
.type = BPF_PROG_TYPE_CGROUP_SKB,
};
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+ .ops = &cg_sock_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCK
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
@@ -3082,6 +3142,7 @@ static int __init register_sk_filter_ops(void)
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
bpf_register_prog_type(&cg_skb_type);
+ bpf_register_prog_type(&cg_sock_type);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b7734f91abd6..15a7a2667e1e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3105,6 +3105,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int pos;
int dummy;
+ if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
+ (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
+ /* gso_size is untrusted, and we have a frag_list with a linear
+ * non head_frag head.
+ *
+ * (we assume checking the first list_skb member suffices;
+ * i.e if either of the list_skb members have non head_frag
+ * head, then the first one has too).
+ *
+ * If head_skb's headlen does not fit requested gso_size, it
+ * means that the frag_list members do NOT terminate on exact
+ * gso_size boundaries. Hence we cannot perform skb_frag_t page
+ * sharing. Therefore we must fallback to copying the frag_list
+ * skbs; we do so by disabling SG.
+ */
+ if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
+ features &= ~NETIF_F_SG;
+ }
+
__skb_push(head_skb, doffset);
proto = skb_network_protocol(head_skb, &dummy);
if (unlikely(!proto))
@@ -3122,9 +3141,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
int hsize;
int size;
- len = head_skb->len - offset;
- if (len > mss)
- len = mss;
+ if (unlikely(mss == GSO_BY_FRAGS)) {
+ len = list_skb->len;
+ } else {
+ len = head_skb->len - offset;
+ if (len > mss)
+ len = mss;
+ }
hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ae0969c0fc2e..2ba3ae7720a8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
@@ -87,22 +93,42 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
return more_reuse;
}
+static void reuseport_free_rcu(struct rcu_head *head)
+{
+ struct sock_reuseport *reuse;
+
+ reuse = container_of(head, struct sock_reuseport, rcu);
+ if (reuse->prog)
+ bpf_prog_destroy(reuse->prog);
+ kfree(reuse);
+}
+
/**
* reuseport_add_sock - Add a socket to the reuseport group of another.
* @sk: New socket to add to the group.
* @sk2: Socket belonging to the existing reuseport group.
* May return ENOMEM and not add socket to group under memory pressure.
*/
-int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2)
{
- struct sock_reuseport *reuse;
+ struct sock_reuseport *old_reuse, *reuse;
+
+ if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+ int err = reuseport_alloc(sk2);
+
+ if (err)
+ return err;
+ }
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "socket already in reuseport group");
+ lockdep_is_held(&reuseport_lock));
+ old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_socks != 1) {
+ spin_unlock_bh(&reuseport_lock);
+ return -EBUSY;
+ }
if (reuse->num_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
@@ -120,20 +146,12 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2)
spin_unlock_bh(&reuseport_lock);
+ if (old_reuse)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
EXPORT_SYMBOL(reuseport_add_sock);
-static void reuseport_free_rcu(struct rcu_head *head)
-{
- struct sock_reuseport *reuse;
-
- reuse = container_of(head, struct sock_reuseport, rcu);
- if (reuse->prog)
- bpf_prog_destroy(reuse->prog);
- kfree(reuse);
-}
-
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
@@ -173,7 +191,7 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
/* temporarily advance data past protocol header */
if (!pskb_pull(skb, hdr_len)) {
- consume_skb(nskb);
+ kfree_skb(nskb);
return NULL;
}
index = bpf_prog_run_save_cb(prog, skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3f51280374f0..ac0e8152c0f1 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -804,7 +804,7 @@ static int dccp_v4_rcv(struct sk_buff *skb)
}
lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport);
if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index bb1a7405dc0e..0958721e4d3d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -684,7 +684,7 @@ static int dccp_v6_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
if (!sk) {
@@ -1010,7 +1010,7 @@ static struct proto dccp_v6_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v6_do_rcv,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 42ab1b61b513..6383627b783e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -182,12 +182,14 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
static HLIST_HEAD(raw_head);
static DEFINE_RWLOCK(raw_lock);
-static void raw_hash(struct sock *sk)
+static int raw_hash(struct sock *sk)
{
write_lock_bh(&raw_lock);
sk_add_node(sk, &raw_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&raw_lock);
+
+ return 0;
}
static void raw_unhash(struct sock *sk)
@@ -462,12 +464,14 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
return container_of(sk, struct dgram_sock, sk);
}
-static void dgram_hash(struct sock *sk)
+static int dgram_hash(struct sock *sk)
{
write_lock_bh(&dgram_lock);
sk_add_node(sk, &dgram_head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&dgram_lock);
+
+ return 0;
}
static void dgram_unhash(struct sock *sk)
@@ -1034,8 +1038,13 @@ static int ieee802154_create(struct net *net, struct socket *sock,
/* Checksums on by default */
sock_set_flag(sk, SOCK_ZAPPED);
- if (sk->sk_prot->hash)
- sk->sk_prot->hash(sk);
+ if (sk->sk_prot->hash) {
+ rc = sk->sk_prot->hash(sk);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
if (sk->sk_prot->init) {
rc = sk->sk_prot->init(sk);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 39eeb5c2d23d..a35c252235ae 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -387,13 +387,27 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
/* Add to protocol hash chains. */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
sk_common_release(sk);
+ goto out;
+ }
}
out:
return err;
@@ -1159,8 +1173,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
* Besides that, it does not check for connection
* uniqueness. Wait for troubles.
*/
- __sk_prot_rehash(sk);
- return 0;
+ return __sk_prot_rehash(sk);
}
int inet_sk_rebuild_header(struct sock *sk)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4b50bcbc6100..f6f3ee843d12 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -24,6 +24,7 @@
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
+#include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
- (sk2->sk_state != TCP_TIME_WAIT &&
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -157,6 +159,7 @@ again:
sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) &&
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
@@ -225,15 +228,18 @@ tb_found:
if (((tb->fastreuse > 0 &&
sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- smallest_size == -1) {
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
goto success;
} else {
ret = 1;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
(tb->fastreuseport > 0 &&
- sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+ sk->sk_reuseport &&
+ !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) {
spin_unlock(&head->lock);
goto again;
@@ -755,6 +761,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
+ int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -772,13 +779,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
- return 0;
+ if (likely(!err))
+ return 0;
}
sk->sk_state = TCP_CLOSE;
- return -EADDRINUSE;
+ return err;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fcb83b2a61f0..1582fe5b04c3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -370,18 +370,18 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
struct sock *sk;
if (req->sdiag_family == AF_INET)
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
else if (req->sdiag_family == AF_INET6) {
if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
- sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
+ sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
req->id.idiag_dport, req->id.idiag_src[3],
req->id.idiag_sport, req->id.idiag_if);
else
- sk = inet6_lookup(net, hashinfo,
+ sk = inet6_lookup(net, hashinfo, NULL, 0,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
(struct in6_addr *)req->id.idiag_src,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 738cd5c822b1..b947e4c9be18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,10 +20,12 @@
#include <linux/wait.h>
#include <linux/vmalloc.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
const __u16 lport, const __be32 faddr,
@@ -206,6 +208,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
struct sock *__inet_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
const __be32 daddr, const unsigned short hnum,
const int dif)
@@ -215,6 +218,7 @@ struct sock *__inet_lookup_listener(struct net *net,
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
rcu_read_lock();
@@ -230,6 +234,15 @@ begin:
if (reuseport) {
phash = inet_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -247,11 +260,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -450,32 +465,73 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
}
EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
-void __inet_hash(struct sock *sk, struct sock *osk)
+static int inet_reuseport_add_sock(struct sock *sk,
+ struct inet_listen_hashbucket *ilb,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
+{
+ struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
+ struct sock *sk2;
+ struct hlist_nulls_node *node;
+ kuid_t uid = sock_i_uid(sk);
+
+ sk_nulls_for_each_rcu(sk2, node, &ilb->head) {
+ if (sk2 != sk &&
+ sk2->sk_family == sk->sk_family &&
+ ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
+ sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
+ inet_csk(sk2)->icsk_bind_hash == tb &&
+ sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ saddr_same(sk, sk2, false))
+ return reuseport_add_sock(sk, sk2);
+ }
+
+ return reuseport_alloc(sk);
+}
+
+int __inet_hash(struct sock *sk, struct sock *osk,
+ int (*saddr_same)(const struct sock *sk1,
+ const struct sock *sk2,
+ bool match_wildcard))
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
+ int err = 0;
if (sk->sk_state != TCP_LISTEN) {
inet_ehash_nolisten(sk, osk);
- return;
+ return 0;
}
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
spin_lock(&ilb->lock);
+ if (sk->sk_reuseport) {
+ err = inet_reuseport_add_sock(sk, ilb, saddr_same);
+ if (err)
+ goto unlock;
+ }
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+unlock:
spin_unlock(&ilb->lock);
+
+ return err;
}
EXPORT_SYMBOL(__inet_hash);
-void inet_hash(struct sock *sk)
+int inet_hash(struct sock *sk)
{
+ int err = 0;
+
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
- __inet_hash(sk, NULL);
+ err = __inet_hash(sk, NULL, ipv4_rcv_saddr_equal);
local_bh_enable();
}
+
+ return err;
}
EXPORT_SYMBOL_GPL(inet_hash);
@@ -494,6 +550,8 @@ void inet_unhash(struct sock *sk)
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
done = __sk_nulls_del_node_init_rcu(sk);
if (done)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c3f808602b65..f56e803c07f9 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -145,10 +145,12 @@ fail:
}
EXPORT_SYMBOL_GPL(ping_get_port);
-void ping_hash(struct sock *sk)
+int ping_hash(struct sock *sk)
{
pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
BUG(); /* "Please do not press this button again." */
+
+ return 0;
}
void ping_unhash(struct sock *sk)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 81ade975db5f..b75a24adb580 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ static struct raw_hashinfo raw_v4_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
};
-void raw_hash_sk(struct sock *sk)
+int raw_hash_sk(struct sock *sk)
{
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_head *head;
@@ -104,6 +104,8 @@ void raw_hash_sk(struct sock *sk)
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock_bh(&h->lock);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(raw_hash_sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e373dcdab5e2..4a9d8c117794 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -643,8 +643,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = __inet_lookup_listener(net,
- &tcp_hashinfo, ip_hdr(skb)->saddr,
+ sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
ntohs(th->source), inet_iif(skb));
/* don't send rst if it can't find key */
@@ -1631,7 +1631,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->sacked = 0;
lookup:
- sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest);
if (!sk)
goto no_tcp_socket;
@@ -1754,7 +1755,8 @@ do_time_wait:
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- &tcp_hashinfo,
+ &tcp_hashinfo, skb,
+ __tcp_hdrlen(th),
iph->saddr, th->source,
iph->daddr, th->dest,
inet_iif(skb));
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0a0d44c53373..4976a1c9835f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -143,10 +143,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
unsigned int log)
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
- sk_nulls_for_each(sk2, node, &hslot->head) {
+ sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(bitmap || udp_sk(sk2)->udp_port_hash == num) &&
@@ -177,12 +176,11 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
bool match_wildcard))
{
struct sock *sk2;
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
int res = 0;
spin_lock(&hslot2->lock);
- udp_portaddr_for_each_entry(sk2, node, &hslot2->head) {
+ udp_portaddr_for_each_entry(sk2, &hslot2->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(udp_sk(sk2)->udp_port_hash == num) &&
@@ -207,11 +205,10 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
bool match_wildcard))
{
struct net *net = sock_net(sk);
- struct hlist_nulls_node *node;
kuid_t uid = sock_i_uid(sk);
struct sock *sk2;
- sk_nulls_for_each(sk2, node, &hslot->head) {
+ sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
sk2->sk_family == sk->sk_family &&
@@ -224,10 +221,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -338,17 +332,23 @@ found:
goto fail_unlock;
}
- sk_nulls_add_node_rcu(sk, &hslot->head);
+ sk_add_node_rcu(sk, &hslot->head);
hslot->count++;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
spin_lock(&hslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
- &hslot2->head);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
+ else
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ &hslot2->head);
hslot2->count++;
spin_unlock(&hslot2->lock);
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
@@ -361,8 +361,8 @@ EXPORT_SYMBOL(udp_lib_get_port);
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- bool match_wildcard)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ bool match_wildcard)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -498,34 +498,31 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = 0;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
- if (sk2) {
- result = sk2;
- goto found;
- }
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ badness = score;
+ result = sk;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -533,23 +530,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
@@ -561,14 +541,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int dif, struct udp_table *udptable, struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp4_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -578,7 +556,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
@@ -588,33 +566,29 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
result = udp4_lib_lookup2(net, saddr, sport,
htonl(INADDR_ANY), hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = 0;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -622,25 +596,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, saddr, hnum, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -656,13 +611,24 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
- return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif,
- &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
+#endif
static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
@@ -764,7 +730,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
void udp_err(struct sk_buff *skb, u32 info)
@@ -1482,13 +1448,13 @@ void udp_lib_unhash(struct sock *sk)
spin_lock_bh(&hslot->lock);
if (rcu_access_pointer(sk->sk_reuseport_cb))
reuseport_detach_sock(sk);
- if (sk_nulls_del_node_init_rcu(sk)) {
+ if (sk_del_node_init_rcu(sk)) {
hslot->count--;
inet_sk(sk)->inet_num = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
}
@@ -1521,12 +1487,12 @@ void udp_lib_rehash(struct sock *sk, u16 newhash)
if (hslot2 != nhslot2) {
spin_lock(&hslot2->lock);
- hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
+ hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node);
hslot2->count--;
spin_unlock(&hslot2->lock);
spin_lock(&nhslot2->lock);
- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
+ hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
&nhslot2->head);
nhslot2->count++;
spin_unlock(&nhslot2->lock);
@@ -1704,35 +1670,6 @@ drop:
return -1;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- unsigned int i;
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
-
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
-
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
@@ -1756,14 +1693,14 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udp_table *udptable,
int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
- struct hlist_nulls_node *node;
+ struct sock *sk, *first = NULL;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = skb->dev->ifindex;
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
+ int dif = skb->dev->ifindex;
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
@@ -1774,23 +1711,28 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+
+ if (!first) {
+ first = sk;
+ continue;
}
- }
+ nskb = skb_clone(skb, GFP_ATOMIC);
- spin_unlock(&hslot->lock);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
+ }
+ if (udp_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -1798,16 +1740,13 @@ start_lookup:
goto start_lookup;
}
- /*
- * do the slow work with no lock held
- */
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udp_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -1912,7 +1851,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
inet_compute_pseudo);
ret = udp_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input, but
* it wants the return to be -protocol, or 0
@@ -1973,49 +1911,24 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
int dif)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
- unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
+ unsigned int slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
/* Do not bother scanning a too big list */
if (hslot->count > 10)
return NULL;
- rcu_read_lock();
-begin:
- count = 0;
result = NULL;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
- if (__udp_is_mcast_sock(net, sk,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum)) {
+ sk_for_each_rcu(sk, &hslot->head) {
+ if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr,
+ rmt_port, rmt_addr, dif, hnum)) {
+ if (result)
+ return NULL;
result = sk;
- ++count;
- }
- }
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
- if (count != 1 ||
- unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!__udp_is_mcast_sock(net, result,
- loc_port, loc_addr,
- rmt_port, rmt_addr,
- dif, hnum))) {
- sock_put(result);
- result = NULL;
}
}
- rcu_read_unlock();
+
return result;
}
@@ -2028,37 +1941,22 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
- struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
+ struct sock *sk;
- rcu_read_lock();
- result = NULL;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr, ports, dif))
- result = sk;
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
+ if (INET_MATCH(sk, net, acookie, rmt_addr,
+ loc_addr, ports, dif))
+ return sk;
/* Only check first socket in chain */
break;
}
-
- if (result) {
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(!INET_MATCH(sk, net, acookie,
- rmt_addr, loc_addr,
- ports, dif))) {
- sock_put(result);
- result = NULL;
- }
- }
- rcu_read_unlock();
- return result;
+ return NULL;
}
void udp_v4_early_demux(struct sk_buff *skb)
@@ -2066,7 +1964,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
const struct udphdr *uh;
- struct sock *sk;
+ struct sock *sk = NULL;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
int ours;
@@ -2098,11 +1996,9 @@ void udp_v4_early_demux(struct sk_buff *skb)
} else if (skb->pkt_type == PACKET_HOST) {
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
- } else {
- return;
}
- if (!sk)
+ if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
return;
skb->sk = sk;
@@ -2395,7 +2291,6 @@ struct proto udp_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
@@ -2417,14 +2312,13 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
for (state->bucket = start; state->bucket <= state->udp_table->mask;
++state->bucket) {
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == state->family)
@@ -2443,7 +2337,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
struct net *net = seq_file_net(seq);
do {
- sk = sk_nulls_next(sk);
+ sk = sk_next(sk);
} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
if (!sk) {
@@ -2658,12 +2552,12 @@ void __init udp_table_init(struct udp_table *table, const char *name)
table->hash2 = table->hash + (table->mask + 1);
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
+ INIT_HLIST_HEAD(&table->hash[i].head);
table->hash[i].count = 0;
spin_lock_init(&table->hash[i].lock);
}
for (i = 0; i <= table->mask; i++) {
- INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i);
+ INIT_HLIST_HEAD(&table->hash2[i].head);
table->hash2[i].count = 0;
spin_lock_init(&table->hash2[i].lock);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 6510e961aaa6..9a89c10a55f0 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -36,10 +36,11 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
const struct inet_diag_req_v2 *req)
{
int err = -EINVAL;
- struct sock *sk;
+ struct sock *sk = NULL;
struct sk_buff *rep;
struct net *net = sock_net(in_skb->sk);
+ rcu_read_lock();
if (req->sdiag_family == AF_INET)
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
@@ -54,9 +55,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
req->id.idiag_dport,
req->id.idiag_if, tbl, NULL);
#endif
- else
- goto out_nosk;
-
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ rcu_read_unlock();
err = -ENOENT;
if (!sk)
goto out_nosk;
@@ -97,25 +98,24 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
- int num, s_num, slot, s_slot;
- struct net *net = sock_net(skb->sk);
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct net *net = sock_net(skb->sk);
+ int num, s_num, slot, s_slot;
s_slot = cb->args[0];
num = s_num = cb->args[1];
for (slot = s_slot; slot <= table->mask; s_num = 0, slot++) {
- struct sock *sk;
- struct hlist_nulls_node *node;
struct udp_hslot *hslot = &table->hash[slot];
+ struct sock *sk;
num = 0;
- if (hlist_nulls_empty(&hslot->head))
+ if (hlist_empty(&hslot->head))
continue;
spin_lock_bh(&hslot->lock);
- sk_nulls_for_each(sk, node, &hslot->head) {
+ sk_for_each(sk, &hslot->head) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 78766b32b78b..705d9fbf0bcf 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -55,7 +55,6 @@ struct proto udplite_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.obj_size = sizeof(struct udp_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8e38e058a66c..3cd9ad455910 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -250,7 +250,11 @@ lookup_protocol:
* creation time automatically shares.
*/
inet->inet_sport = htons(inet->inet_num);
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
@@ -259,6 +263,14 @@ lookup_protocol:
goto out;
}
}
+
+ if (!kern) {
+ err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+ if (err) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
out:
return err;
out_rcu_unlock:
@@ -277,6 +289,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -396,19 +409,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 908525ae5d19..c7ce585ac17e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -26,6 +26,7 @@
#include <net/ip6_route.h>
#include <net/sock.h>
#include <net/inet6_connection_sock.h>
+#include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax)
@@ -48,6 +49,7 @@ int inet6_csk_bind_conflict(const struct sock *sk,
if ((!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) &&
(!reuseport || !sk2->sk_reuseport ||
+ rcu_access_pointer(sk->sk_reuseport_cb) ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid,
sock_i_uid((struct sock *)sk2))))) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 21ace5a2bf7c..c27207563d10 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -17,11 +17,13 @@
#include <linux/module.h>
#include <linux/random.h>
+#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
+#include <net/sock_reuseport.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
@@ -121,7 +123,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
@@ -129,6 +133,7 @@ struct sock *inet6_lookup_listener(struct net *net,
const struct hlist_nulls_node *node;
struct sock *result;
int score, hiscore, matches = 0, reuseport = 0;
+ bool select_ok = true;
u32 phash = 0;
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
@@ -146,6 +151,15 @@ begin:
if (reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
+ if (select_ok) {
+ struct sock *sk2;
+ sk2 = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (sk2) {
+ result = sk2;
+ goto found;
+ }
+ }
matches = 1;
}
} else if (score == hiscore && reuseport) {
@@ -163,11 +177,13 @@ begin:
if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
goto begin;
if (result) {
+found:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, daddr,
dif) < hiscore)) {
sock_put(result);
+ select_ok = false;
goto begin;
}
}
@@ -177,6 +193,7 @@ begin:
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+ struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
const int dif)
@@ -184,7 +201,8 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
struct sock *sk;
local_bh_disable();
- sk = __inet6_lookup(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif);
+ sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
+ ntohs(dport), dif);
local_bh_enable();
return sk;
@@ -274,3 +292,61 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
__inet6_check_established);
}
EXPORT_SYMBOL_GPL(inet6_hash_connect);
+
+int inet6_hash(struct sock *sk)
+{
+ int err = 0;
+
+ if (sk->sk_state != TCP_CLOSE) {
+ local_bh_disable();
+ err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+ local_bh_enable();
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(inet6_hash);
+
+/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
+ * only, and any IPv4 addresses if not IPv6 only
+ * match_wildcard == false: addresses must be exactly the same, i.e.
+ * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
+ * and 0.0.0.0 equals to 0.0.0.0 only
+ */
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
+{
+ const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
+ int sk2_ipv6only = inet_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ /* if both are mapped, treat as IPv4 */
+ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
+ if (!sk2_ipv6only) {
+ if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
+ return 1;
+ if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
+ return match_wildcard;
+ }
+ return 0;
+ }
+
+ if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
+ !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e2fe07f7ded9..50091116fb55 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -895,7 +895,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* no RST generated if md5 hash doesn't match.
*/
sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- &tcp_hashinfo, &ipv6h->saddr,
+ &tcp_hashinfo, NULL, 0,
+ &ipv6h->saddr,
th->source, &ipv6h->daddr,
ntohs(th->source), tcp_v6_iif(skb));
if (!sk1)
@@ -1420,8 +1421,8 @@ static int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- inet6_iif(skb));
+ sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ th->source, th->dest, inet6_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1552,6 +1553,7 @@ do_time_wait:
struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
ntohs(th->dest), tcp_v6_iif(skb));
@@ -1921,7 +1923,7 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .hash = inet_hash,
+ .hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b345b37edd4f..ce3a100cbf68 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
+#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
@@ -78,49 +79,6 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
- * only, and any IPv4 addresses if not IPv6 only
- * match_wildcard == false: addresses must be exactly the same, i.e.
- * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
- * and 0.0.0.0 equals to 0.0.0.0 only
- */
-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
-{
- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
- int sk2_ipv6only = inet_v6_ipv6only(sk2);
- int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
-
- /* if both are mapped, treat as IPv4 */
- if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
- if (!sk2_ipv6only) {
- if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr)
- return 1;
- if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr)
- return match_wildcard;
- }
- return 0;
- }
-
- if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
-
- if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
-
- if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
- !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
-
- if (sk2_rcv_saddr6 &&
- ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))
- return 1;
-
- return 0;
-}
-
static u32 udp6_portaddr_hash(const struct net *net,
const struct in6_addr *addr6,
unsigned int port)
@@ -252,34 +210,32 @@ static inline int compute_score2(struct sock *sk, struct net *net,
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
- struct udp_hslot *hslot2, unsigned int slot2)
+ struct udp_hslot *hslot2, unsigned int slot2,
+ struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
-begin:
result = NULL;
badness = -1;
- udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, NULL, 0);
- if (sk2) {
- result = sk2;
- goto found;
- }
+
+ result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -287,27 +243,10 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot2)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score2(result, net, saddr, sport,
- daddr, hnum, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
return result;
}
+/* rcu_read_lock() must be held */
struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
@@ -315,14 +254,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct sk_buff *skb)
{
struct sock *sk, *result;
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness, matches = 0, reuseport = 0;
u32 hash = 0;
- rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
@@ -332,7 +269,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
@@ -342,32 +279,28 @@ struct sock *__udp6_lib_lookup(struct net *net,
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
- hslot2, slot2);
+ hslot2, slot2, skb);
}
- rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
- sk_nulls_for_each_rcu(sk, node, &hslot->head) {
+ sk_for_each_rcu(sk, &hslot->head) {
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
if (score > badness) {
- result = sk;
- badness = score;
reuseport = sk->sk_reuseport;
if (reuseport) {
- struct sock *sk2;
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
- sk2 = reuseport_select_sock(sk, hash, skb,
+ result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
- if (sk2) {
- result = sk2;
- goto found;
- }
+ if (result)
+ return result;
matches = 1;
}
+ result = sk;
+ badness = score;
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -375,25 +308,6 @@ begin:
hash = next_pseudo_random32(hash);
}
}
- /*
- * if the nulls value we got at the end of this lookup is
- * not the expected one, we must restart lookup.
- * We probably met an item that was moved to another chain.
- */
- if (get_nulls_value(node) != slot)
- goto begin;
-
- if (result) {
-found:
- if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
- result = NULL;
- else if (unlikely(compute_score(result, net, hnum, saddr, sport,
- daddr, dport, dif) < badness)) {
- sock_put(result);
- goto begin;
- }
- }
- rcu_read_unlock();
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -413,12 +327,24 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
udptable, skb);
}
+/* Must be called under rcu_read_lock().
+ * Does increment socket refcount.
+ */
+#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
+ IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
- return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL);
+ struct sock *sk;
+
+ sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ dif, &udp_table, NULL);
+ if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ return sk;
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
+#endif
/*
* This should be easy, if there is something there we
@@ -584,7 +510,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), udptable, skb);
+ inet6_iif(skb), udptable, NULL);
if (!sk) {
ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
@@ -615,7 +541,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- sock_put(sk);
+ return;
}
int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -775,33 +701,6 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
-static void flush_stack(struct sock **stack, unsigned int count,
- struct sk_buff *skb, unsigned int final)
-{
- struct sk_buff *skb1 = NULL;
- struct sock *sk;
- unsigned int i;
-
- for (i = 0; i < count; i++) {
- sk = stack[i];
- if (likely(!skb1))
- skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
- if (!skb1) {
- atomic_inc(&sk->sk_drops);
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
- IS_UDPLITE(sk));
- UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- }
-
- if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0)
- skb1 = NULL;
- sock_put(sk);
- }
- if (unlikely(skb1))
- kfree_skb(skb1);
-}
-
static void udp6_csum_zero_error(struct sk_buff *skb)
{
/* RFC 2460 section 8.1 says that we SHOULD log
@@ -820,15 +719,15 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
const struct in6_addr *saddr, const struct in6_addr *daddr,
struct udp_table *udptable, int proto)
{
- struct sock *sk, *stack[256 / sizeof(struct sock *)];
+ struct sock *sk, *first = NULL;
const struct udphdr *uh = udp_hdr(skb);
- struct hlist_nulls_node *node;
unsigned short hnum = ntohs(uh->dest);
struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
- int dif = inet6_iif(skb);
- unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node);
+ unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
- bool inner_flushed = false;
+ int dif = inet6_iif(skb);
+ struct hlist_node *node;
+ struct sk_buff *nskb;
if (use_hash2) {
hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
@@ -839,27 +738,32 @@ start_lookup:
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
- spin_lock(&hslot->lock);
- sk_nulls_for_each_entry_offset(sk, node, &hslot->head, offset) {
- if (__udp_v6_is_mcast_sock(net, sk,
- uh->dest, daddr,
- uh->source, saddr,
- dif, hnum) &&
- /* If zero checksum and no_check is not on for
- * the socket then skip it.
- */
- (uh->check || udp_sk(sk)->no_check6_rx)) {
- if (unlikely(count == ARRAY_SIZE(stack))) {
- flush_stack(stack, count, skb, ~0);
- inner_flushed = true;
- count = 0;
- }
- stack[count++] = sk;
- sock_hold(sk);
+ sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
+ if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
+ uh->source, saddr, dif, hnum))
+ continue;
+ /* If zero checksum and no_check is not on for
+ * the socket then skip it.
+ */
+ if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ continue;
+ if (!first) {
+ first = sk;
+ continue;
+ }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ atomic_inc(&sk->sk_drops);
+ UDP6_INC_STATS_BH(net, UDP_MIB_RCVBUFERRORS,
+ IS_UDPLITE(sk));
+ UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ continue;
}
- }
- spin_unlock(&hslot->lock);
+ if (udpv6_queue_rcv_skb(sk, nskb) > 0)
+ consume_skb(nskb);
+ }
/* Also lookup *:port if we are using hash2 and haven't done so yet. */
if (use_hash2 && hash2 != hash2_any) {
@@ -867,13 +771,13 @@ start_lookup:
goto start_lookup;
}
- if (count) {
- flush_stack(stack, count, skb, count - 1);
+ if (first) {
+ if (udpv6_queue_rcv_skb(first, skb) > 0)
+ consume_skb(skb);
} else {
- if (!inner_flushed)
- UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
- proto == IPPROTO_UDPLITE);
- consume_skb(skb);
+ kfree_skb(skb);
+ UDP6_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI,
+ proto == IPPROTO_UDPLITE);
}
return 0;
}
@@ -881,10 +785,10 @@ start_lookup:
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
+ const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
- struct sock *sk;
struct udphdr *uh;
- const struct in6_addr *saddr, *daddr;
+ struct sock *sk;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
@@ -938,7 +842,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int ret;
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
- sock_put(sk);
udp6_csum_zero_error(skb);
goto csum_error;
}
@@ -948,7 +851,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ip6_compute_pseudo);
ret = udpv6_queue_rcv_skb(sk, skb);
- sock_put(sk);
/* a return value > 0 means to resubmit the input */
if (ret > 0)
@@ -995,7 +897,6 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif)
{
struct sock *sk;
- struct hlist_nulls_node *hnode;
unsigned short hnum = ntohs(loc_port);
unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
@@ -1003,7 +904,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
- udp_portaddr_for_each_entry_rcu(sk, hnode, &hslot2->head) {
+ udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
return sk;
@@ -1648,7 +1549,6 @@ struct proto udpv6_prot = {
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index d1eaeeaa34d2..af2895c77ed6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -50,7 +50,6 @@ struct proto udplitev6_prot = {
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
.obj_size = sizeof(struct udp6_sock),
- .slab_flags = SLAB_DESTROY_BY_RCU,
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udpv6_setsockopt,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index aed0c7350988..3a31370d568a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -212,13 +212,14 @@ discard:
return 0;
}
-static void l2tp_ip_hash(struct sock *sk)
+static int l2tp_ip_hash(struct sock *sk)
{
if (sk_unhashed(sk)) {
write_lock_bh(&l2tp_ip_lock);
sk_add_node(sk, &l2tp_ip_table);
write_unlock_bh(&l2tp_ip_lock);
}
+ return 0;
}
static void l2tp_ip_unhash(struct sock *sk)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bcb7bce86d98..8cc6a554b6f8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -225,13 +225,14 @@ discard:
return 0;
}
-static void l2tp_ip6_hash(struct sock *sk)
+static int l2tp_ip6_hash(struct sock *sk)
{
if (sk_unhashed(sk)) {
write_lock_bh(&l2tp_ip6_lock);
sk_add_node(sk, &l2tp_ip6_table);
write_unlock_bh(&l2tp_ip6_lock);
}
+ return 0;
}
static void l2tp_ip6_unhash(struct sock *sk)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2ed50ba6c1af..d32a3fd4528e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1600,7 +1600,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
}
EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
-int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
+int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
{
int i, bucket, rc;
unsigned int hashsize, old_size;
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e84a578dbe35..d76afafdc699 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 1fb2258c3535..8039bcd60758 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -107,7 +107,7 @@ static int __init nf_nat_irc_init(void)
}
/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
+static int warn_set(const char *val, const struct kernel_param *kp)
{
printk(KERN_INFO KBUILD_MODNAME
": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
* belonging to established connections going through that one.
*/
static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
- &iph->saddr, laddr,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ tproto, &iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 939821821fcb..c5d930670cf0 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
* box.
*/
static struct sock *
-xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return __inet_lookup(net, &tcp_hashinfo,
+ return __inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -148,6 +149,8 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -173,6 +176,10 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -205,9 +212,9 @@ struct sock *xt_socket_lookup_slow_v4(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- indev);
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), data_skb, doff,
+ protocol, saddr, daddr, sport,
+ dport, indev);
return sk;
}
@@ -328,14 +335,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
}
static struct sock *
-xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo,
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -354,6 +362,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -375,6 +385,10 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
sport = hp->source;
daddr = &iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
struct ipv6hdr ipv6_var;
@@ -389,8 +403,8 @@ struct sock *xt_socket_lookup_slow_v6(struct net *net,
if (sk)
atomic_inc(&sk->sk_refcnt);
else
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), data_skb, doff,
+ tproto, saddr, daddr, sport, dport,
indev);
return sk;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index d575ef4e9aa6..ffd5f2297584 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -140,13 +140,15 @@ void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb)
rcu_read_unlock();
}
-void pn_sock_hash(struct sock *sk)
+int pn_sock_hash(struct sock *sk)
{
struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject);
mutex_lock(&pnsocks.lock);
sk_add_node_rcu(sk, hlist);
mutex_unlock(&pnsocks.lock);
+
+ return 0;
}
EXPORT_SYMBOL(pn_sock_hash);
@@ -200,7 +202,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len)
pn->resource = spn->spn_resource;
/* Enable RX on the socket */
- sk->sk_prot->hash(sk);
+ err = sk->sk_prot->hash(sk);
out_port:
mutex_unlock(&port_mutex);
out:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fcac88f1774b..4d60f1e42f42 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6171,9 +6171,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
return retval;
}
-static void sctp_hash(struct sock *sk)
+static int sctp_hash(struct sock *sk)
{
/* STUB */
+ return 0;
}
static void sctp_unhash(struct sock *sk)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 41f6e964fe91..1a7d930a867c 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map);
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
struct svc_pool_map *m = &svc_pool_map;
@@ -80,7 +80,7 @@ out:
}
static int
-param_get_pool_mode(char *buf, struct kernel_param *kp)
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
{
int *ip = (int *)kp->arg;
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 2e69bf6da207..80cfd227c984 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,7 +37,9 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
(void *) BPF_FUNC_clone_redirect;
static int (*bpf_redirect)(int ifindex, int flags) =
(void *) BPF_FUNC_redirect;
-static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
(void *) BPF_FUNC_perf_event_output;
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 1112dd94b440..8a491e6b542c 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -148,6 +148,37 @@ cc-disable-warning = $(call try-run,\
# Expands to either gcc or clang
cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
+# __cc-version
+# Returns compiler version
+__cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/$(cc-name)-version.sh $(CC))
+
+# __cc-fullversion
+# Returns full compiler version
+__cc-fullversion = $(shell $(CONFIG_SHELL) \
+ $(srctree)/scripts/$(cc-name)-version.sh -p $(CC))
+
+# __cc-ifversion
+# Matches compiler name and version
+# Usage: EXTRA_CFLAGS += $(call cc-if-name-version, gcc, -lt, 0402, -O1)
+__cc-ifversion = $(shell [ $(cc-name) = $(1) ] && [ $(__cc-version) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# __cc-if-fullversion
+# Matches compiler name and full version
+# Usage: EXTRA_CFLAGS += $(call cc-if-name-fullversion, gcc, -lt, 040502, -O1)
+__cc-if-fullversion = $(shell [ $(cc-name) = $(1) ] && [ $(__cc-fullversion) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# gcc-ifversion
+gcc-ifversion = $(call __cc-ifversion, gcc, $(1), $(2), $(3), $(4))
+
+# gcc-if-fullversion
+gcc-if-fullversion = (call __cc-if-fullversion, gcc, $(1), $(2), $(3), $(4))
+
+# clang-ifversion
+clang-ifversion = $(call __cc-ifversion, clang, $(1), $(2), $(3), $(4))
+
+# clang-if-fullversion
+clang-if-fullversion = (call __cc-if-fullversion, clang, $(1), $(2), $(3), $(4))
+
# cc-version
cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
@@ -155,9 +186,9 @@ cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
cc-fullversion = $(shell $(CONFIG_SHELL) \
$(srctree)/scripts/gcc-version.sh -p $(CC))
-# cc-ifversion
-# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
-cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
+# backward compatibility
+cc-ifversion = $(gcc-ifversion)
+cc-if-fullversion = $(gcc-if-fullversion)
# cc-ldoption
# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
@@ -165,14 +196,18 @@ cc-ldoption = $(call try-run,\
$(CC) $(1) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
# ld-option
-# Usage: LDFLAGS += $(call ld-option, -X)
-ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2))
+# Usage: LDFLAGS += $(call ld-option, -X, -Y)
+ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2),$(3))
# ar-option
# Usage: KBUILD_ARFLAGS := $(call ar-option,D)
# Important: no spaces around options
ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))
+# ld-name
+# Expands to either bfd or gold
+ld-name = $(shell $(LD) -v 2>&1 | grep -q "GNU gold" && echo gold || echo bfd)
+
# ld-version
# Note this is mainly for HJ Lu's 3 number binutil versions
ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
@@ -181,6 +216,18 @@ ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
# Usage: $(call ld-ifversion, -ge, 22252, y)
ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4))
+# __ld-ifversion
+# Usage: $(call __ld-ifversion, gold, -ge, 112000000, y)
+__ld-ifversion = $(shell [ $(ld-name) = $(1) ] && [ $(ld-version) $(2) $(3) ] && echo $(4) || echo $(5))
+
+# bfd-ifversion
+# Usage: $(call bfd-ifversion, -ge, 227000000, y)
+bfd-ifversion = $(call __ld-ifversion, bfd, $(1), $(2), $(3), $(4))
+
+# gold-ifversion
+# Usage: $(call gold-ifversion, -ge, 112000000, y)
+gold-ifversion = $(call __ld-ifversion, gold, $(1), $(2), $(3), $(4))
+
######
###
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 726d1b3f6759..23d49459ee66 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -280,6 +280,13 @@ define rule_as_o_S
mv -f $(dot-target).tmp $(dot-target).cmd
endef
+# List module undefined symbols (or empty line if not enabled)
+ifdef CONFIG_TRIM_UNUSED_KSYMS
+cmd_undef_syms = $(NM) $@ | sed -n 's/^ \+U //p' | xargs echo
+else
+cmd_undef_syms = echo
+endif
+
# Built-in and composite module parts
$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
@@ -290,7 +297,8 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE
$(call cmd,force_checksrc)
$(call if_changed_rule,cc_o_c)
- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod)
+ @{ echo $(@:.o=.ko); echo $@; \
+ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
quiet_cmd_cc_lst_c = MKLST $@
cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
@@ -415,12 +423,22 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ;
# Rule to compile a set of .o files into one .o file
#
ifdef builtin-target
-quiet_cmd_link_o_target = LD $@
+
+ifdef CONFIG_THIN_ARCHIVES
+ cmd_make_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
+ cmd_make_empty_builtin = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS)
+ quiet_cmd_link_o_target = AR $@
+else
+ cmd_make_builtin = $(LD) $(ld_flags) -r -o
+ cmd_make_empty_builtin = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS)
+ quiet_cmd_link_o_target = LD $@
+endif
+
# If the list of objects to link is empty, just create an empty built-in.o
cmd_link_o_target = $(if $(strip $(obj-y)),\
- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^) \
+ $(cmd_make_builtin) $@ $(filter $(obj-y), $^) \
$(cmd_secanalysis),\
- rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@)
+ $(cmd_make_empty_builtin) $@)
$(builtin-target): $(obj-y) FORCE
$(call if_changed,link_o_target)
@@ -446,7 +464,12 @@ $(modorder-target): $(subdir-ym) FORCE
#
ifdef lib-target
quiet_cmd_link_l_target = AR $@
-cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+
+ifdef CONFIG_THIN_ARCHIVES
+ cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
+else
+ cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+endif
$(lib-target): $(lib-y) FORCE
$(call if_changed,link_l_target)
@@ -461,16 +484,27 @@ endif
# <composite-object>-objs := <list of .o files>
# or
# <composite-object>-y := <list of .o files>
+# or
+# <composite-object>-m := <list of .o files>
+# The -m syntax only works if <composite object> is a module
link_multi_deps = \
$(filter $(addprefix $(obj)/, \
$($(subst $(obj)/,,$(@:.o=-objs))) \
-$($(subst $(obj)/,,$(@:.o=-y)))), $^)
+$($(subst $(obj)/,,$(@:.o=-y))) \
+$($(subst $(obj)/,,$(@:.o=-m)))), $^)
-quiet_cmd_link_multi-y = LD $@
-cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
+
+ifdef CONFIG_THIN_ARCHIVES
+ quiet_cmd_link_multi-y = AR $@
+ cmd_link_multi-y = rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps)
+else
+ quiet_cmd_link_multi-y = LD $@
+ cmd_link_multi-y = $(cmd_link_multi-link)
+endif
quiet_cmd_link_multi-m = LD [M] $@
-cmd_link_multi-m = $(cmd_link_multi-y)
+cmd_link_multi-m = $(cmd_link_multi-link)
$(multi-used-y): FORCE
$(call if_changed,link_multi-y)
@@ -478,8 +512,9 @@ $(call multi_depend, $(multi-used-y), .o, -objs -y)
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod)
-$(call multi_depend, $(multi-used-m), .o, -objs -y)
+ @{ echo $(@:.o=.ko); echo $(link_multi_deps); \
+ $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
targets += $(multi-used-y) $(multi-used-m)
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 133edfae5b8a..a89a0c5d43b2 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -85,7 +85,7 @@ hostcxx_flags = -Wp,-MD,$(depfile) $(__hostcxx_flags)
# Create executable from a single .c file
# host-csingle -> Executable
quiet_cmd_host-csingle = HOSTCC $@
- cmd_host-csingle = $(HOSTCC) $(hostc_flags) -o $@ $< \
+ cmd_host-csingle = $(HOSTCC) $(hostc_flags) $(HOSTLDFLAGS) -o $@ $< \
$(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
$(host-csingle): $(obj)/%: $(src)/%.c FORCE
$(call if_changed_dep,host-csingle)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 5a93712ea570..2daf8d6e49a1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -48,7 +48,7 @@ subdir-ym := $(sort $(subdir-y) $(subdir-m))
# if $(foo-objs) exists, foo.o is a composite object
multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
-multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
+multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))), $(m))))
multi-used := $(multi-used-y) $(multi-used-m)
single-used-m := $(sort $(filter-out $(multi-used-m),$(obj-m)))
@@ -67,7 +67,7 @@ obj-dirs := $(dir $(multi-objs) $(obj-y))
# Replace multi-part objects by their individual parts, look at local dir only
real-objs-y := $(foreach m, $(filter-out $(subdir-obj-y), $(obj-y)), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m))) $(extra-y)
-real-objs-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m)))
+real-objs-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))),$($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)),$(m)))
# Add subdir path
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 7718a64b1cd1..8cb7971b3f25 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -115,14 +115,18 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
targets += $(modules:.ko=.mod.o)
-# Step 6), final link of the modules
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
+
+# Step 6), final link of the modules with optional arch pass after final link
quiet_cmd_ld_ko_o = LD [M] $@
- cmd_ld_ko_o = $(LD) -r $(LDFLAGS) \
- $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
- -o $@ $(filter-out FORCE,$^)
+ cmd_ld_ko_o = \
+ $(LD) -r $(LDFLAGS) \
+ $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
+ -o $@ $(filter-out FORCE,$^) ; \
+ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
$(modules): %.ko :%.o %.mod.o FORCE
- $(call if_changed,ld_ko_o)
+ +$(call if_changed,ld_ko_o)
targets += $(modules)
diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
new file mode 100755
index 000000000000..9780efa56980
--- /dev/null
+++ b/scripts/clang-version.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-version [-p] clang-command
+#
+# Prints the compiler version of `clang-command' in a canonical 4-digit form
+# such as `0500' for clang-5.0 etc.
+#
+# With the -p option, prints the patchlevel as well, for example `050001' for
+# clang-5.0.1 etc.
+#
+
+if [ "$1" = "-p" ] ; then
+ with_patchlevel=1;
+ shift;
+fi
+
+compiler="$*"
+
+if [ ${#compiler} -eq 0 ]; then
+ echo "Error: No compiler specified."
+ printf "Usage:\n\t$0 <clang-command>\n"
+ exit 1
+fi
+
+MAJOR=$(echo __clang_major__ | $compiler -E -x c - | tail -n 1)
+MINOR=$(echo __clang_minor__ | $compiler -E -x c - | tail -n 1)
+if [ "x$with_patchlevel" != "x" ] ; then
+ PATCHLEVEL=$(echo __clang_patchlevel__ | $compiler -E -x c - | tail -n 1)
+ printf "%02d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
+else
+ printf "%02d%02d\\n" $MAJOR $MINOR
+fi
diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c
index b071bf476fea..dd1a4bd706a2 100644
--- a/scripts/extract-cert.c
+++ b/scripts/extract-cert.c
@@ -23,6 +23,13 @@
#include <openssl/err.h>
#include <openssl/engine.h>
+/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
#define PKEY_ID_PKCS7 2
static __attribute__((noreturn))
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index ba6c34ea5429..0a08006d600a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -3,9 +3,12 @@
# link vmlinux
#
# vmlinux is linked from the objects selected by $(KBUILD_VMLINUX_INIT) and
-# $(KBUILD_VMLINUX_MAIN). Most are built-in.o files from top-level directories
-# in the kernel tree, others are specified in arch/$(ARCH)/Makefile.
-# Ordering when linking is important, and $(KBUILD_VMLINUX_INIT) must be first.
+# $(KBUILD_VMLINUX_MAIN) and $(KBUILD_VMLINUX_LIBS). Most are built-in.o files
+# from top-level directories in the kernel tree, others are specified in
+# arch/$(ARCH)/Makefile. Ordering when linking is important, and
+# $(KBUILD_VMLINUX_INIT) must be first. $(KBUILD_VMLINUX_LIBS) are archives
+# which are linked conditionally (not within --whole-archive), and do not
+# require symbol indexes added.
#
# vmlinux
# ^
@@ -16,6 +19,9 @@
# +--< $(KBUILD_VMLINUX_MAIN)
# | +--< drivers/built-in.o mm/built-in.o + more
# |
+# +--< $(KBUILD_VMLINUX_LIBS)
+# | +--< lib/lib.a + more
+# |
# +-< ${kallsymso} (see description in KALLSYMS section)
#
# vmlinux version (uname -v) cannot be updated during normal
@@ -37,12 +43,47 @@ info()
fi
}
+# Thin archive build here makes a final archive with symbol table and indexes
+# from vmlinux objects INIT and MAIN, which can be used as input to linker.
+# KBUILD_VMLINUX_LIBS archives should already have symbol table and indexes
+# added.
+#
+# Traditional incremental style of link does not require this step
+#
+# built-in.o output file
+#
+archive_builtin()
+{
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ info AR built-in.o
+ rm -f built-in.o;
+ ${AR} rcsTP${KBUILD_ARFLAGS} built-in.o \
+ ${KBUILD_VMLINUX_INIT} \
+ ${KBUILD_VMLINUX_MAIN}
+ fi
+}
+
# Link of vmlinux.o used for section mismatch analysis
# ${1} output file
modpost_link()
{
- ${LD} ${LDFLAGS} -r -o ${1} ${KBUILD_VMLINUX_INIT} \
- --start-group ${KBUILD_VMLINUX_MAIN} --end-group
+ local objects
+
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="--whole-archive \
+ built-in.o \
+ --no-whole-archive \
+ --start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ --start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group"
+ fi
+ ${LD} ${LDFLAGS} -r -o ${1} ${objects}
}
# Link of vmlinux
@@ -51,18 +92,50 @@ modpost_link()
vmlinux_link()
{
local lds="${objtree}/${KBUILD_LDS}"
+ local objects
if [ "${SRCARCH}" != "um" ]; then
- ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
- -T ${lds} ${KBUILD_VMLINUX_INIT} \
- --start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1}
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="--whole-archive \
+ built-in.o \
+ --no-whole-archive \
+ --start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group \
+ ${1}"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ --start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ --end-group \
+ ${1}"
+ fi
+
+ ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
+ -T ${lds} ${objects}
else
- ${CC} ${CFLAGS_vmlinux} -o ${2} \
- -Wl,-T,${lds} ${KBUILD_VMLINUX_INIT} \
- -Wl,--start-group \
- ${KBUILD_VMLINUX_MAIN} \
- -Wl,--end-group \
- -lutil -lrt -lpthread ${1}
+ if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+ objects="-Wl,--whole-archive \
+ built-in.o \
+ -Wl,--no-whole-archive \
+ -Wl,--start-group \
+ ${KBUILD_VMLINUX_LIBS} \
+ -Wl,--end-group \
+ ${1}"
+ else
+ objects="${KBUILD_VMLINUX_INIT} \
+ -Wl,--start-group \
+ ${KBUILD_VMLINUX_MAIN} \
+ ${KBUILD_VMLINUX_LIBS} \
+ -Wl,--end-group \
+ ${1}"
+ fi
+
+ ${CC} ${CFLAGS_vmlinux} -o ${2} \
+ -Wl,-T,${lds} \
+ ${objects} \
+ -lutil -lrt -lpthread
rm -f linux
fi
}
@@ -118,6 +191,7 @@ cleanup()
rm -f .tmp_kallsyms*
rm -f .tmp_version
rm -f .tmp_vmlinux*
+ rm -f built-in.o
rm -f System.map
rm -f vmlinux
rm -f vmlinux.o
@@ -161,13 +235,6 @@ case "${KCONFIG_CONFIG}" in
. "./${KCONFIG_CONFIG}"
esac
-#link vmlinux.o
-info LD vmlinux.o
-modpost_link vmlinux.o
-
-# modpost vmlinux.o to check for section mismatches
-${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
-
# Update version
info GEN .version
if [ ! -r .version ]; then
@@ -181,6 +248,15 @@ fi;
# final build of init/
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
+archive_builtin
+
+#link vmlinux.o
+info LD vmlinux.o
+modpost_link vmlinux.o
+
+# modpost vmlinux.o to check for section mismatches
+${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then
diff --git a/scripts/sign-file.c b/scripts/sign-file.c
index 250a7a645033..b57ade78b24a 100755
--- a/scripts/sign-file.c
+++ b/scripts/sign-file.c
@@ -28,6 +28,13 @@
#include <openssl/engine.h>
/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+/*
* Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to
* assume that it's not available and its header file is missing and that we
* should use PKCS#7 instead. Switching to the older PKCS#7 format restricts
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index a95b6c98d460..6087f210c93a 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -677,11 +677,11 @@ static const struct kernel_param_ops param_ops_aalockpolicy = {
.get = param_get_aalockpolicy
};
-static int param_set_audit(const char *val, struct kernel_param *kp);
-static int param_get_audit(char *buffer, struct kernel_param *kp);
+static int param_set_audit(const char *val, const struct kernel_param *kp);
+static int param_get_audit(char *buffer, const struct kernel_param *kp);
-static int param_set_mode(const char *val, struct kernel_param *kp);
-static int param_get_mode(char *buffer, struct kernel_param *kp);
+static int param_set_mode(const char *val, const struct kernel_param *kp);
+static int param_get_mode(char *buffer, const struct kernel_param *kp);
/* Flag values, also controllable via /sys/module/apparmor/parameters
* We define special types as we want to do additional mediation.
@@ -789,7 +789,7 @@ static int param_get_aauint(char *buffer, const struct kernel_param *kp)
return param_get_uint(buffer, kp);
}
-static int param_get_audit(char *buffer, struct kernel_param *kp)
+static int param_get_audit(char *buffer, const struct kernel_param *kp)
{
if (!policy_view_capable())
return -EPERM;
@@ -800,7 +800,7 @@ static int param_get_audit(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "%s", audit_mode_names[aa_g_audit]);
}
-static int param_set_audit(const char *val, struct kernel_param *kp)
+static int param_set_audit(const char *val, const struct kernel_param *kp)
{
int i;
if (!policy_admin_capable())
@@ -822,7 +822,7 @@ static int param_set_audit(const char *val, struct kernel_param *kp)
return -EINVAL;
}
-static int param_get_mode(char *buffer, struct kernel_param *kp)
+static int param_get_mode(char *buffer, const struct kernel_param *kp)
{
if (!policy_admin_capable())
return -EPERM;
@@ -833,7 +833,7 @@ static int param_get_mode(char *buffer, struct kernel_param *kp)
return sprintf(buffer, "%s", aa_profile_mode_names[aa_g_profile_mode]);
}
-static int param_set_mode(const char *val, struct kernel_param *kp)
+static int param_set_mode(const char *val, const struct kernel_param *kp)
{
int i;
if (!policy_admin_capable())
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b967dcadcb4d..3d24f86b423f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2774,7 +2774,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
return rc;
/* Allow all mounts performed by the kernel */
- if (flags & MS_KERNMOUNT)
+ if (flags & (MS_KERNMOUNT | MS_SUBMOUNT))
return 0;
ad.type = LSM_AUDIT_DATA_DENTRY;
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index e27b71a5d8d5..d1ee3dfb2555 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -105,7 +105,8 @@ struct security_class_mapping secclass_map[] = {
{ COMMON_IPC_PERMS, NULL } },
{ "netlink_route_socket",
{ COMMON_SOCK_PERMS,
- "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", NULL } },
+ "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", "nlmsg_getneigh",
+ NULL } },
{ "netlink_tcpdiag_socket",
{ COMMON_SOCK_PERMS,
"nlmsg_read", "nlmsg_write", NULL } },
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index b45a3a72c161..efb852e076b6 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -79,6 +79,7 @@ enum {
#define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
extern int selinux_android_netlink_route;
+extern int selinux_android_netlink_getneigh;
extern int selinux_policycap_netpeer;
extern int selinux_policycap_openperm;
extern int selinux_policycap_alwaysnetwork;
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 78a8c420b1f5..c321361af6bf 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -192,12 +192,12 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
return err;
}
-static void nlmsg_set_getlink_perm(u32 perm)
+static void nlmsg_set_perm_for_type(u32 perm, u16 type)
{
int i;
for (i = 0; i < ARRAY_SIZE(nlmsg_route_perms); i++) {
- if (nlmsg_route_perms[i].nlmsg_type == RTM_GETLINK) {
+ if (nlmsg_route_perms[i].nlmsg_type == type) {
nlmsg_route_perms[i].perm = perm;
break;
}
@@ -207,11 +207,27 @@ static void nlmsg_set_getlink_perm(u32 perm)
/**
* Use nlmsg_readpriv as the permission for RTM_GETLINK messages if the
* netlink_route_getlink policy capability is set. Otherwise use nlmsg_read.
+ * Similarly, use nlmsg_getneigh for RTM_GETNEIGH and RTM_GETNEIGHTBL if the
+ * netlink_route_getneigh policy capability is set. Otherwise use nlmsg_read.
*/
void selinux_nlmsg_init(void)
{
if (selinux_android_netlink_route)
- nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV,
+ RTM_GETLINK);
else
- nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READ);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETLINK);
+
+ if (selinux_android_netlink_getneigh) {
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_GETNEIGH,
+ RTM_GETNEIGH);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_GETNEIGH,
+ RTM_GETNEIGHTBL);
+ } else {
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETNEIGH);
+ nlmsg_set_perm_for_type(NETLINK_ROUTE_SOCKET__NLMSG_READ,
+ RTM_GETNEIGHTBL);
+ }
}
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 5ee23e3a3678..c5b2940f4dab 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -2333,6 +2333,10 @@ int policydb_read(struct policydb *p, void *fp)
p->android_netlink_route = 1;
}
+ if ((le32_to_cpu(buf[1]) & POLICYDB_CONFIG_ANDROID_NETLINK_GETNEIGH)) {
+ p->android_netlink_getneigh = 1;
+ }
+
if (p->policyvers >= POLICYDB_VERSION_POLCAP) {
rc = ebitmap_read(&p->policycaps, fp);
if (rc)
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 0d511cf3c1e9..45698f754766 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -228,6 +228,7 @@ struct genfs {
struct policydb {
int mls_enabled;
int android_netlink_route;
+ int android_netlink_getneigh;
/* symbol tables */
struct symtab symtab[SYM_NUM];
@@ -315,6 +316,7 @@ extern int policydb_write(struct policydb *p, void *fp);
#define POLICYDB_CONFIG_MLS 1
#define POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE (1 << 31)
+#define POLICYDB_CONFIG_ANDROID_NETLINK_GETNEIGH (1 << 30)
/* the config flags related to unknown classes/perms are bits 2 and 3 */
#define REJECT_UNKNOWN 0x00000002
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 52cf500a8418..b615627d1abc 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -71,6 +71,7 @@
#include "audit.h"
int selinux_android_netlink_route;
+int selinux_android_netlink_getneigh;
int selinux_policycap_netpeer;
int selinux_policycap_openperm;
int selinux_policycap_alwaysnetwork;
@@ -2000,6 +2001,7 @@ static void security_load_policycaps(void)
POLICYDB_CAPABILITY_ALWAYSNETWORK);
selinux_android_netlink_route = policydb.android_netlink_route;
+ selinux_android_netlink_getneigh = policydb.android_netlink_getneigh;
selinux_nlmsg_init();
}
diff --git a/sound/core/control.c b/sound/core/control.c
index 43c8eac250b8..04b939df3768 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -877,24 +877,18 @@ static int snd_ctl_elem_read(struct snd_card *card,
struct snd_kcontrol *kctl;
struct snd_kcontrol_volatile *vd;
unsigned int index_offset;
- int result;
- down_read(&card->controls_rwsem);
kctl = snd_ctl_find_id(card, &control->id);
- if (kctl == NULL) {
- result = -ENOENT;
- } else {
- index_offset = snd_ctl_get_ioff(kctl, &control->id);
- vd = &kctl->vd[index_offset];
- if ((vd->access & SNDRV_CTL_ELEM_ACCESS_READ) &&
- kctl->get != NULL) {
- snd_ctl_build_ioff(&control->id, kctl, index_offset);
- result = kctl->get(kctl, control);
- } else
- result = -EPERM;
- }
- up_read(&card->controls_rwsem);
- return result;
+ if (kctl == NULL)
+ return -ENOENT;
+
+ index_offset = snd_ctl_get_ioff(kctl, &control->id);
+ vd = &kctl->vd[index_offset];
+ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL)
+ return -EPERM;
+
+ snd_ctl_build_ioff(&control->id, kctl, index_offset);
+ return kctl->get(kctl, control);
}
static int snd_ctl_elem_read_user(struct snd_card *card,
@@ -909,8 +903,11 @@ static int snd_ctl_elem_read_user(struct snd_card *card,
snd_power_lock(card);
result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (result >= 0)
+ if (result >= 0) {
+ down_read(&card->controls_rwsem);
result = snd_ctl_elem_read(card, control);
+ up_read(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (result >= 0)
if (copy_to_user(_control, control, sizeof(*control)))
@@ -927,30 +924,28 @@ static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file,
unsigned int index_offset;
int result;
- down_read(&card->controls_rwsem);
kctl = snd_ctl_find_id(card, &control->id);
- if (kctl == NULL) {
- result = -ENOENT;
- } else {
- index_offset = snd_ctl_get_ioff(kctl, &control->id);
- vd = &kctl->vd[index_offset];
- if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) ||
- kctl->put == NULL ||
- (file && vd->owner && vd->owner != file)) {
- result = -EPERM;
- } else {
- snd_ctl_build_ioff(&control->id, kctl, index_offset);
- result = kctl->put(kctl, control);
- }
- if (result > 0) {
- struct snd_ctl_elem_id id = control->id;
- up_read(&card->controls_rwsem);
- snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &id);
- return 0;
- }
+ if (kctl == NULL)
+ return -ENOENT;
+
+ index_offset = snd_ctl_get_ioff(kctl, &control->id);
+ vd = &kctl->vd[index_offset];
+ if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_WRITE) || kctl->put == NULL ||
+ (file && vd->owner && vd->owner != file)) {
+ return -EPERM;
}
- up_read(&card->controls_rwsem);
- return result;
+
+ snd_ctl_build_ioff(&control->id, kctl, index_offset);
+ result = kctl->put(kctl, control);
+ if (result < 0)
+ return result;
+
+ if (result > 0) {
+ struct snd_ctl_elem_id id = control->id;
+ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &id);
+ }
+
+ return 0;
}
static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
@@ -967,8 +962,11 @@ static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
card = file->card;
snd_power_lock(card);
result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (result >= 0)
+ if (result >= 0) {
+ down_write(&card->controls_rwsem);
result = snd_ctl_elem_write(card, file, control);
+ up_write(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (result >= 0)
if (copy_to_user(_control, control, sizeof(*control)))
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index 3fd7f67e701e..f1df25922bd7 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -320,8 +320,11 @@ static int ctl_elem_read_user(struct snd_card *card,
snd_power_lock(card);
err = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (err >= 0)
+ if (err >= 0) {
+ down_read(&card->controls_rwsem);
err = snd_ctl_elem_read(card, data);
+ up_read(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (err >= 0)
err = copy_ctl_value_to_user(userdata, valuep, data,
@@ -349,8 +352,11 @@ static int ctl_elem_write_user(struct snd_ctl_file *file,
snd_power_lock(card);
err = snd_power_wait(card, SNDRV_CTL_POWER_D0);
- if (err >= 0)
+ if (err >= 0) {
+ down_write(&card->controls_rwsem);
err = snd_ctl_elem_write(card, file, data);
+ up_write(&card->controls_rwsem);
+ }
snd_power_unlock(card);
if (err >= 0)
err = copy_ctl_value_to_user(userdata, valuep, data,
diff --git a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
index 487aaf2390c0..5f4225e675ad 100644
--- a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
+++ b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2012-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -995,6 +996,14 @@ static int msm_pcm_capture_copy(struct snd_pcm_substream *substream,
xfer = size;
offset = prtd->in_frame_info[idx].offset;
pr_debug("Offset value = %d\n", offset);
+
+ if (offset >= size) {
+ pr_err("%s: Invalid dsp buf offset\n", __func__);
+ ret = -EFAULT;
+ q6asm_cpu_buf_release(OUT, prtd->audio_client);
+ goto fail;
+ }
+
if (copy_to_user(buf, bufptr+offset, xfer)) {
pr_err("Failed to copy buf to user\n");
ret = -EFAULT;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 00326629d4af..69bb3fc38fb2 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,5 @@
socket
psock_fanout
psock_tpacket
+reuseport_bpf
+reuseport_bpf_cpu
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index fac4782c51d8..c658792d47b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g
CFLAGS += -I../../../../usr/include/
-NET_PROGS = socket psock_fanout psock_tpacket
+NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu
all: $(NET_PROGS)
%: %.c
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 000000000000..b5277106df1f
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,641 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result. These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+ int recv_family;
+ int send_family;
+ int protocol;
+ size_t recv_socks;
+ uint16_t recv_port;
+ uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+ return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ addr = malloc(sizeof(struct sockaddr_storage));
+ memset(addr, 0, sizeof(struct sockaddr_storage));
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr *addr = new_any_sockaddr(family, port);
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_addr = in6addr_loopback;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "GPL";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+ { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+ /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+ { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+ /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+ { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+ /* BPF_EXIT_INSN() */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = ARRAY_SIZE(prog);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+ attr.kern_version = 0;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+ struct sock_filter code[] = {
+ /* A = (uint32_t)skb[0] */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
+ /* A = A % mod */
+ { BPF_ALU | BPF_MOD, 0, 0, mod },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = ARRAY_SIZE(code),
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
+ void (*attach_bpf)(int, uint16_t))
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int i, opt;
+
+ for (i = 0; i < p.recv_socks; ++i) {
+ fd[i] = socket(p.recv_family, p.protocol, 0);
+ if (fd[i] < 0)
+ error(1, errno, "failed to create recv %d", i);
+
+ opt = 1;
+ if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+ if (i == 0)
+ attach_bpf(fd[i], mod);
+
+ if (bind(fd[i], addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket %d", i);
+
+ if (p.protocol == SOCK_STREAM) {
+ opt = 4;
+ if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+ sizeof(opt)))
+ error(1, errno,
+ "failed to set TCP_FASTOPEN on %d", i);
+ if (listen(fd[i], p.recv_socks * 10))
+ error(1, errno, "failed to listen on socket");
+ }
+ }
+ free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+ size_t len)
+{
+ struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+ struct sockaddr * const daddr =
+ new_loopback_sockaddr(p.send_family, p.recv_port);
+ const int fd = socket(p.send_family, p.protocol, 0), one = 1;
+
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+ error(1, errno, "failed to set reuseaddr");
+
+ if (bind(fd, saddr, sockaddr_size()))
+ error(1, errno, "failed to bind send socket");
+
+ if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+ free(saddr);
+ free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+ char recv_buf[8], send_buf[8];
+ struct msghdr msg;
+ struct iovec recv_io = { recv_buf, 8 };
+ struct epoll_event ev;
+ int epfd, conn, i, sport, expected;
+ uint32_t data, ndata;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (i = 0; i < p.recv_socks; ++i) {
+ ev.events = EPOLLIN;
+ ev.data.fd = fd[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+ error(1, errno, "failed to register sock %d epoll", i);
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &recv_io;
+ msg.msg_iovlen = 1;
+
+ for (data = 0; data < p.recv_socks * 2; ++data) {
+ sport = p.send_port_min + data;
+ ndata = htonl(data);
+ memcpy(send_buf, &ndata, sizeof(ndata));
+ send_from(p, sport, send_buf, sizeof(ndata));
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll wait failed");
+
+ if (p.protocol == SOCK_STREAM) {
+ conn = accept(ev.data.fd, NULL, NULL);
+ if (conn < 0)
+ error(1, errno, "error accepting");
+ i = recvmsg(conn, &msg, 0);
+ close(conn);
+ } else {
+ i = recvmsg(ev.data.fd, &msg, 0);
+ }
+ if (i < 0)
+ error(1, errno, "recvmsg error");
+ if (i != sizeof(ndata))
+ error(1, 0, "expected size %zd got %d",
+ sizeof(ndata), i);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ if (ev.data.fd == fd[i])
+ break;
+ memcpy(&ndata, recv_buf, sizeof(ndata));
+ fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+ expected = (sport % mod);
+ if (i != expected)
+ error(1, 0, "expected socket %d", expected);
+ }
+}
+
+static void test_reuseport_ebpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_ebpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_reuseport_cbpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_cbpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int fd1, fd2, opt;
+
+ fprintf(stderr, "Testing too many filters...\n");
+ fd1 = socket(p.recv_family, p.protocol, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(p.recv_family, p.protocol, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+
+ opt = 1;
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_ebpf(fd2, 10);
+
+ if (bind(fd1, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+ error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+ free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ const char bpf_license[] = "GPL";
+ struct bpf_insn ecode[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+ union bpf_attr eprog;
+ struct sock_fprog cprog;
+ int fd, bpf_fd;
+
+ fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+ memset(&eprog, 0, sizeof(eprog));
+ eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ eprog.insn_cnt = ARRAY_SIZE(ecode);
+ eprog.insns = (unsigned long) &ecode;
+ eprog.license = (unsigned long) &bpf_license;
+ eprog.kern_version = 0;
+
+ memset(&cprog, 0, sizeof(cprog));
+ cprog.len = ARRAY_SIZE(ccode);
+ cprog.filter = ccode;
+
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error");
+ fd = socket(p.recv_family, p.protocol, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create socket 1");
+
+ if (bind(fd, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+ sizeof(cprog)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+ int fd1, fd2, opt = 1;
+
+ fprintf(stderr, "Testing filter add without bind...\n");
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_cbpf(fd2, 10);
+
+ close(fd1);
+ close(fd2);
+}
+
+void enable_fastopen(void)
+{
+ int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+ int rw_mask = 3; /* bit 1: client side; bit-2 server side */
+ int val, size;
+ char buf[16];
+
+ if (fd < 0)
+ error(1, errno, "Unable to open tcp_fastopen sysctl");
+ if (read(fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read tcp_fastopen sysctl");
+ val = atoi(buf);
+ close(fd);
+
+ if ((val & rw_mask) != rw_mask) {
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (fd < 0)
+ error(1, errno,
+ "Unable to open tcp_fastopen sysctl for writing");
+ val |= rw_mask;
+ size = snprintf(buf, 16, "%d", val);
+ if (write(fd, buf, size) <= 0)
+ error(1, errno, "Unable to write tcp_fastopen sysctl");
+ close(fd);
+ }
+}
+
+static struct rlimit rlim_old;
+
+static __attribute__((constructor)) void main_ctor(void)
+{
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+
+ if (rlim_old.rlim_cur != RLIM_INFINITY) {
+ struct rlimit rlim_new;
+
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+ setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
+int main(void)
+{
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group. Run the bpf test through both paths.
+ */
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8002});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8008});
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8005});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8009});
+
+ fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+
+ /* TCP fastopen is required for the TCP tests */
+ enable_fastopen();
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8008,
+ .send_port_min = 9120});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8009,
+ .send_port_min = 9160});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8010});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8011});
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8012,
+ .send_port_min = 9200});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8013,
+ .send_port_min = 9240});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8014});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8015});
+
+ fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8016,
+ .send_port_min = 9320});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8017,
+ .send_port_min = 9360});
+
+ test_filter_without_bind();
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000000..b23d6f54de7b
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,258 @@
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet. The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core. Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them. The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ struct sock_filter code[] = {
+ /* A = raw_smp_processor_id() */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = 2,
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ cpu_set_t cpu_set;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ memset(&cpu_set, 0, sizeof(cpu_set));
+ CPU_SET(cpu_id, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+ error(1, errno, "failed to pin to cpu");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+ if (cpu_id != i)
+ error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, cpu;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (cpu = 0; cpu < len; ++cpu) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[cpu];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (cpu = 0; cpu < len; ++cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Reverse iterate */
+ for (cpu = len - 1; cpu >= 0; --cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Even cores */
+ for (cpu = 0; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Odd cores */
+ for (cpu = 1; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ close(epfd);
+ for (cpu = 0; cpu < len; ++cpu)
+ close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+ int *rcv_fd, cpus;
+
+ cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus <= 0)
+ error(1, errno, "failed counting cpus");
+
+ rcv_fd = calloc(cpus, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}