summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavide Garberi <dade.garberi@gmail.com>2023-05-28 15:57:41 +0200
committerDavide Garberi <dade.garberi@gmail.com>2023-08-03 02:55:58 +0200
commitce2e1dc133974cf19668249e2f527bb98ddf449f (patch)
tree7c462c49b405757413d42411042274e035debab4 /kernel
parent7ef1b1f38a94bf54b215c4c56ea8d62ec607f73b (diff)
parent1a4b80f8f2017576f530658f48659d9a222f4648 (diff)
Merge lineage-20 of git@github.com:LineageOS/android_kernel_qcom_msm8998.git into lineage-20
1a4b80f8f201 ANDROID: arch:arm64: Increase kernel command line size 7c253f7aa663 of: reserved_mem: increase max number reserved regions df4dbf557503 msm: camera: Fix indentations 2fc4a156d15d msm: camera: Fix code flow when populating CAM_V_CUSTOM1 687bcb61f125 ALSA: control: use counting semaphore as write lock for ELEM_WRITE operation 75cf9e8c1b1c ALSA: control: Fix memory corruption risk in snd_ctl_elem_read 76cf3b5e53df ALSA: control: code refactoring for ELEM_READ/ELEM_WRITE operations e9af212f9685 ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF 95fc4fff573f msm: kgsl: Make sure that pool pages don't have any extra references 59ceabe0d242 msm: kgsl: Use dma_buf_get() to get dma_buf structure d1f19956d6b9 ANDROID: usb: f_accessory: Check buffer size when initialised via composite 2d3ce4f7a366 kbuild: handle libs-y archives separately from built-in.o archives 65dc3fbd1593 kbuild: thin archives use P option to ar 362c7b73bac8 kbuild: thin archives for multi-y targets 43076241b514 kbuild: thin archives final link close --whole-archives option aa04fc78256d kbuild: minor improvement for thin archives build f5896747cda6 Merge tag 'LA.UM.7.2.c25-07700-sdm660.0' of https://git.codelinaro.org/clo/la/platform/vendor/qcom-opensource/wlan/qcacld-3.0 into android13-4.4-msm8998 321ac077ee7e qcacld-3.0: Fix out-of-bounds in tx_stats 42be8e4cbf13 BACKPORT: usb: gadget: rndis: prevent integer overflow in rndis_set_response() b490a85b5945 FROMGIT: arm64: fix oops in concurrently setting insn_emulation sysctls 7ed7084b34a9 FROMLIST: binder: fix UAF of ref->proc caused by race condition e31f087fb864 ANDROID: selinux: modify RTM_GETNEIGH{TBL} 80675d431434 UPSTREAM: usb: gadget: clear related members when goto fail fb6adfb00108 UPSTREAM: usb: gadget: don't release an existing dev->buf e4a8dd12424e UPSTREAM: USB: gadget: validate interface OS descriptor requests 8f0a947317e0 UPSTREAM: usb: gadget: rndis: check size of RNDIS_MSG_SET command 1541758765ff ion: Do not 'put' ION handle until after its final use 03b4b3cd8d30 Merge tag 'LA.UM.7.2.c25-07000-sdm660.0' of https://git.codelinaro.org/clo/la/platform/vendor/qcom-opensource/wlan/qcacld-3.0 into android13-4.4-msm8998 7dbda95466d5 Merge tag 'LA.UM.8.4.c25-06600-8x98.0' of https://git.codelinaro.org/clo/la/kernel/msm-4.4 into android13-4.4-msm8998 369119e5df4e cert host tools: Stop complaining about deprecated OpenSSL functions f8e30a0f9a17 fixup! BACKPORT: treewide: Fix function prototypes for module_param_call() 4fa5045f3dc9 arm64/efi: Mark __efistub_stext_offset as an absolute symbol explicitly bcd9668da77f arm64: kernel: do not need to reset UAO on exception entry c4ddd677f7e3 Kbuild: do not emit debug info for assembly with LLVM_IAS=1 1b880b6e19f8 qcacld-3.0: Add time slice duty cycle in wifi_interface_info fd24be2b22a1 qcacmn: Add time slice duty cycle attribute into QCA vendor command d719c1c825f8 qcacld-3.0: Use field-by-field assignment for FW stats fb5eb3bda2d9 ext4: enable quota enforcement based on mount options cd40d7f301de ext4: adds project ID support 360e2f3d18b8 ext4: add project quota support c31ac2be1594 drivers: qcacld-3.0: Remove in_compat_syscall() redefinition 6735c13a269d arm64: link with -z norelro regardless of CONFIG_RELOCATABLE 99962aab3433 arm64: relocatable: fix inconsistencies in linker script and options 24bd8cc5e6bb arm64: prevent regressions in compressed kernel image size when upgrading to binutils 2.27 93bb4c2392a2 arm64: kernel: force ET_DYN ELF type for CONFIG_RELOCATABLE=y a54bbb725ccb arm64: build with baremetal linker target instead of Linux when available c5805c604a9b arm64: add endianness option to LDFLAGS instead of LD ab6052788f60 arm64: Set UTS_MACHINE in the Makefile c3330429b2c6 kbuild: clear LDFLAGS in the top Makefile f33c1532bd61 kbuild: use HOSTLDFLAGS for single .c executables 38b7db363a96 BACKPORT: arm64: Change .weak to SYM_FUNC_START_WEAK_PI for arch/arm64/lib/mem*.S 716cb63e81d9 BACKPORT: crypto: arm64/aes-ce-cipher - move assembler code to .S file 7dfbaee16432 BACKPORT: arm64: Remove reference to asm/opcodes.h 531ee8624d17 BACKPORT: arm64: kprobe: protect/rename few definitions to be reused by uprobe 08d83c997b0c BACKPORT: arm64: Delete the space separator in __emit_inst e3951152dc2d BACKPORT: arm64: Get rid of asm/opcodes.h 255820c0f301 BACKPORT: arm64: Fix minor issues with the dcache_by_line_op macro 21bb344a664b BACKPORT: crypto: arm64/aes-modes - get rid of literal load of addend vector 26d5a53c6e0d BACKPORT: arm64: vdso: remove commas between macro name and arguments 78bff1f77c9d BACKPORT: kbuild: support LLVM=1 to switch the default tools to Clang/LLVM 6634f9f63efe BACKPORT: kbuild: replace AS=clang with LLVM_IAS=1 b891e8fdc466 BACKPORT: Documentation/llvm: fix the name of llvm-size 75d6fa8368a8 BACKPORT: Documentation/llvm: add documentation on building w/ Clang/LLVM 95b0a5e52f2a BACKPORT: ANDROID: ftrace: fix function type mismatches 7da9c2138ec8 BACKPORT: ANDROID: fs: logfs: fix filler function type d6d5a4b28ad0 BACKPORT: ANDROID: fs: gfs2: fix filler function type 9b194a470db5 BACKPORT: ANDROID: fs: exofs: fix filler function type 7a45ac4bfb49 BACKPORT: ANDROID: fs: afs: fix filler function type 4099e1b281e5 BACKPORT: drivers/perf: arm_pmu: fix function type mismatch af7b738882f7 BACKPORT: dummycon: fix function types 1b0b55a36dbe BACKPORT: fs: nfs: fix filler function type a58a0e30e20a BACKPORT: mm: fix filler function type mismatch 829e9226a8c0 BACKPORT: mm: fix drain_local_pages function type 865ef61b4da8 BACKPORT: vfs: pass type instead of fn to do_{loop,iter}_readv_writev() 08d2f8e7ba8e BACKPORT: module: Do not paper over type mismatches in module_param_call() ea467f6c33e4 BACKPORT: treewide: Fix function prototypes for module_param_call() d131459e6b8b BACKPORT: module: Prepare to convert all module_param_call() prototypes 6f52abadf006 BACKPORT: kbuild: fix --gc-sections bf7540ffce44 BACKPORT: kbuild: record needed exported symbols for modules c49d2545e437 BACKPORT: kbuild: Allow to specify composite modules with modname-m 427d0fc67dc1 BACKPORT: kbuild: add arch specific post-link Makefile 69f8a31838a3 BACKPORT: arm64: add a workaround for GNU gold with ARM64_MODULE_PLTS ba3368756abf BACKPORT: arm64: explicitly pass --no-fix-cortex-a53-843419 to GNU gold 6dacd7e737fb BACKPORT: arm64: errata: Pass --fix-cortex-a53-843419 to ld if workaround enabled d2787c21f2b5 BACKPORT: kbuild: add __ld-ifversion and linker-specific macros 2d471de60bb4 BACKPORT: kbuild: add ld-name macro 06280a90d845 BACKPORT: arm64: keep .altinstructions and .altinstr_replacement eb0ad3ae07f9 BACKPORT: kbuild: add __cc-ifversion and compiler-specific variants 3d01e1eba86b BACKPORT: FROMLIST: kbuild: add clang-version.sh 18dd378ab563 BACKPORT: FROMLIST: kbuild: fix LD_DEAD_CODE_DATA_ELIMINATION aabbc122b1de BACKPORT: kbuild: thin archives make default for all archs 756d47e345fc BACKPORT: kbuild: allow archs to select link dead code/data elimination 723ab99e48a7 BACKPORT: kbuild: allow architectures to use thin archives instead of ld -r 0b77ec583772 drivers/usb/serial/console.c: remove superfluous serial->port condition 6488cb478f04 drivers/firmware/efi/libstub.c: prevent a relocation dba4259216a0 UPSTREAM: pidfd: fix a poll race when setting exit_state baab6e33b07b BACKPORT: arch: wire-up pidfd_open() 5d2e9e4f8630 BACKPORT: pid: add pidfd_open() f8396a127daf UPSTREAM: pidfd: add polling support f4c358582254 UPSTREAM: signal: improve comments 5500316dc8d8 UPSTREAM: fork: do not release lock that wasn't taken fc7d707593e3 BACKPORT: signal: support CLONE_PIDFD with pidfd_send_signal f044fa00d72a BACKPORT: clone: add CLONE_PIDFD f20fc1c548f2 UPSTREAM: Make anon_inodes unconditional de80525cd462 UPSTREAM: signal: use fdget() since we don't allow O_PATH 229e1bdd624e UPSTREAM: signal: don't silently convert SI_USER signals to non-current pidfd ada02e996b52 BACKPORT: signal: add pidfd_send_signal() syscall 828857678c5c compat: add in_compat_syscall to ask whether we're in a compat syscall e7aede4896c0 bpf: Add new cgroup attach type to enable sock modifications 9ed75228b09c ebpf: allow bpf_get_current_uid_gid_proto also for networking c5aa3963b4ae bpf: fix overflow in prog accounting c46a001439fc bpf: Make sure mac_header was set before using it 8aed99185615 bpf: Enlarge offset check value to INT_MAX in bpf_skb_{load,store}_bytes b0a638335ba6 bpf: avoid false sharing of map refcount with max_entries 1f21605e373c net: remove hlist_nulls_add_tail_rcu() 9ce369b09dbb udp: get rid of SLAB_DESTROY_BY_RCU allocations 070f539fb5d7 udp: no longer use SLAB_DESTROY_BY_RCU a32d2ea857c5 inet: refactor inet[6]_lookup functions to take skb fcf3e7bc7203 soreuseport: fix initialization race df03c8cf024a soreuseport: Fix TCP listener hash collision bd8b9f50c9d3 inet: Fix missing return value in inet6_hash bae331196dd0 soreuseport: fast reuseport TCP socket selection 4ada2ed73da0 inet: create IPv6-equivalent inet_hash function 73f609838475 sock: struct proto hash function may error e3b32750621b cgroup: Fix sock_cgroup_data on big-endian. 69dabcedd4b9 selinux: always allow mounting submounts 17d6ddebcc49 userns: Don't fail follow_automount based on s_user_ns cbd08255e6f8 fs: Better permission checking for submounts 3a9ace719251 mnt: Move the FS_USERNS_MOUNT check into sget_userns af53549b43c5 locks: sprinkle some tracepoints around the file locking code 07dbbc84aa34 locks: rename __posix_lock_file to posix_lock_inode 400cbe93d180 autofs: Fix automounts by using current_real_cred()->uid 7903280ee07a fs: Call d_automount with the filesystems creds b87fb50ff1cd UPSTREAM: kernfs: Check KERNFS_HAS_RELEASE before calling kernfs_release_file() c9c596de3e52 UPSTREAM: kernfs: fix locking around kernfs_ops->release() callback 2172eaf5a901 UPSTREAM: cgroup, bpf: remove unnecessary #include dc81f3963dde kernfs: kernfs_sop_show_path: don't return 0 after seq_dentry call ce9a52e20897 cgroup: Make rebind_subsystems() disable v2 controllers all at once ce5e3aa14c39 cgroup: fix sock_cgroup_data initialization on earlier compilers 94a70ef24da9 samples/bpf: fix bpf_perf_event_output prototype c1920272278e net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list d7707635776b sk_buff: allow segmenting based on frag sizes 924bbacea75e ip_tunnel, bpf: ip_tunnel_info_opts_{get, set} depends on CONFIG_INET 0e9008d618f4 bpf: udp: ipv6: Avoid running reuseport's bpf_prog from __udp6_lib_err 01b437940f5e soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF 421fbf04bf2c soreuseport: change consume_skb to kfree_skb in error case 1ab50514c430 ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only f3dfd61c502d soreuseport: fix ordering for mixed v4/v6 sockets 245ee3c90795 soreuseport: fix NULL ptr dereference SO_REUSEPORT after bind 113fb209854a bpf: do not blindly change rlimit in reuseport net selftest 985253ef27d2 bpf: fix rlimit in reuseport net selftest ae61334510be soreuseport: Fix reuseport_bpf testcase on 32bit architectures 6efa24da01a5 udp: fix potential infinite loop in SO_REUSEPORT logic 66df70c6605d soreuseport: BPF selection functional test for TCP fe161031b8a8 soreuseport: pass skb to secondary UDP socket lookup 9223919efdf2 soreuseport: BPF selection functional test 2090ed790dbb soreuseport: fix mem leak in reuseport_add_sock() 67887f6ac3f1 Merge "diag: Ensure dci entry is valid before sending the packet" e41c0da23b38 diag: Prevent out of bound write while sending dci pkt to remote e1085d1ef39b diag: Ensure dci entry is valid before sending the packet 16802e80ecb5 Merge "ion: Fix integer overflow in msm_ion_custom_ioctl" 57146f83f388 ion: Fix integer overflow in msm_ion_custom_ioctl 6fc2001969fe diag: Use valid data_source for a valid token 0c6dbf858a98 qcacld-3.0: Avoid OOB read in dot11f_unpack_assoc_response f07caca0c485 qcacld-3.0: Fix array OOB for duplicate rate 5a359aba0364 msm: kgsl: Remove 'fd' dependency to get dma_buf handle da8317596949 msm: kgsl: Fix gpuaddr_in_range() to check upper bound 2ed91a98d8b4 msm: adsprpc: Handle UAF in fastrpc debugfs read 2967159ad303 msm: kgsl: Add a sysfs node to control performance counter reads e392a84f25f5 msm: kgsl: Perform cache flush on the pages obtained using get_user_pages() 28b45f75d2ee soc: qcom: hab: Add sanity check for payload_count 885caec7690f Merge "futex: Fix inode life-time issue" 0f57701d2643 Merge "futex: Handle faults correctly for PI futexes" 7d7eb450c333 Merge "futex: Rework inconsistent rt_mutex/futex_q state" 124ebd87ef2f msm: kgsl: Fix out of bound write in adreno_profile_submit_time 228bbfb25032 futex: Fix inode life-time issue 7075ca6a22b3 futex: Handle faults correctly for PI futexes a436b73e9032 futex: Simplify fixup_pi_state_owner() 11b99dbe3221 futex: Use pi_state_update_owner() in put_pi_state() f34484030550 rtmutex: Remove unused argument from rt_mutex_proxy_unlock() 079d1c90b3c3 futex: Provide and use pi_state_update_owner() 3b51e24eb17b futex: Replace pointless printk in fixup_owner() 0eac5c2583a1 futex: Avoid violating the 10th rule of futex 6d6ed38b7d10 futex: Rework inconsistent rt_mutex/futex_q state 3c8f7dfd59b5 futex: Remove rt_mutex_deadlock_account_*() 9c870a329520 futex,rt_mutex: Provide futex specific rt_mutex API 7504736e8725 msm: adsprpc: Handle UAF in process shell memory 994e5922a0c2 Disable TRACER Check to improve Camera Performance 8fb3f17b3ad1 msm: kgsl: Deregister gpu address on memdesc_sg_virt failure 13aa628efdca Merge "crypto: Fix possible stack out-of-bound error" 92e777451003 Merge "msm: kgsl: Correct the refcount on current process PID." 9ca218394ed4 Merge "msm: kgsl: Compare pid pointer instead of TGID for a new process" 7eed1f2e0f43 Merge "qcom,max-freq-level change for trial" 6afb5eb98e36 crypto: Fix possible stack out-of-bound error 8b5ba278ed4b msm: kgsl: Correct the refcount on current process PID. 4150552fac96 msm: kgsl: Compare pid pointer instead of TGID for a new process c272102c0793 qcom,max-freq-level change for trial 854ef3ce73f5 msm: kgsl: Protect the memdesc->gpuaddr in SVM use cases. 79c8161aeac9 msm: kgsl: Stop using memdesc->usermem. Change-Id: Iea7db1362c3cd18e36f243411e773a9054f6a445
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c33
-rw-r--r--kernel/bpf/core.c24
-rw-r--r--kernel/bpf/syscall.c57
-rw-r--r--kernel/cgroup.c31
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c136
-rw-r--r--kernel/pid.c74
-rw-r--r--kernel/signal.c142
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/trace.c4
10 files changed, 482 insertions, 37 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 357ce8355d57..8210c7dd7532 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -424,3 +424,36 @@ int __cgroup_bpf_run_filter(struct sock *sk,
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
+
+/**
+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+ * @sk: sock structure to manipulate
+ * @type: The type of program to be exectuted
+ *
+ * socket is passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ struct bpf_prog *prog;
+ int ret = 0;
+
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]->progs[0]);
+ if (prog)
+ ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 95ffe1fac0bf..2b1a925489cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -60,11 +60,13 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
{
u8 *ptr = NULL;
- if (k >= SKF_NET_OFF)
+ if (k >= SKF_NET_OFF) {
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
+ } else if (k >= SKF_LL_OFF) {
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return NULL;
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
+ }
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
@@ -105,19 +107,29 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
gfp_extra_flags;
struct bpf_prog *fp;
+ u32 pages, delta;
+ int ret;
BUG_ON(fp_old == NULL);
size = round_up(size, PAGE_SIZE);
- if (size <= fp_old->pages * PAGE_SIZE)
+ pages = size / PAGE_SIZE;
+ if (pages <= fp_old->pages)
return fp_old;
+ delta = pages - fp_old->pages;
+ ret = __bpf_prog_charge(fp_old->aux->user, delta);
+ if (ret)
+ return NULL;
+
fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
- if (fp != NULL) {
+ if (fp == NULL) {
+ __bpf_prog_uncharge(fp_old->aux->user, delta);
+ } else {
kmemcheck_annotate_bitfield(fp, meta);
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
- fp->pages = size / PAGE_SIZE;
+ fp->pages = pages;
fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a73a056ccd88..f65bd2dc07f8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -652,19 +652,39 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+int __bpf_prog_charge(struct user_struct *user, u32 pages)
+{
+ unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ unsigned long user_bufs;
+
+ if (user) {
+ user_bufs = atomic_long_add_return(pages, &user->locked_vm);
+ if (user_bufs > memlock_limit) {
+ atomic_long_sub(pages, &user->locked_vm);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
+{
+ if (user)
+ atomic_long_sub(pages, &user->locked_vm);
+}
+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = get_current_user();
- unsigned long memlock_limit;
-
- memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ int ret;
- atomic_long_add(prog->pages, &user->locked_vm);
- if (atomic_long_read(&user->locked_vm) > memlock_limit) {
- atomic_long_sub(prog->pages, &user->locked_vm);
+ ret = __bpf_prog_charge(user, prog->pages);
+ if (ret) {
free_uid(user);
- return -EPERM;
+ return ret;
}
+
prog->aux->user = user;
return 0;
}
@@ -673,7 +693,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
{
struct user_struct *user = prog->aux->user;
- atomic_long_sub(prog->pages, &user->locked_vm);
+ __bpf_prog_uncharge(user, prog->pages);
free_uid(user);
}
@@ -933,7 +953,24 @@ static int bpf_prog_attach(const union bpf_attr *attr)
bpf_prog_put(prog);
cgroup_put(cgrp);
break;
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ prog = bpf_prog_get_type(attr->attach_bpf_fd,
+ BPF_PROG_TYPE_CGROUP_SOCK);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ attr->attach_flags);
+ if (ret)
+ bpf_prog_put(prog);
+ cgroup_put(cgrp);
+ break;
default:
return -EINVAL;
}
@@ -961,7 +998,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_CGROUP_INET_EGRESS:
ptype = BPF_PROG_TYPE_CGROUP_SKB;
break;
-
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
default:
return -EINVAL;
}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d36c4f914a1e..c2508ca442b7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1544,6 +1544,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
struct cgroup_subsys *ss;
int ssid, i, ret;
+ u16 dfl_disable_ss_mask = 0;
lockdep_assert_held(&cgroup_mutex);
@@ -1560,8 +1561,28 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;
+
+ /*
+ * Collect ssid's that need to be disabled from default
+ * hierarchy.
+ */
+ if (ss->root == &cgrp_dfl_root)
+ dfl_disable_ss_mask |= 1 << ssid;
+
} while_each_subsys_mask();
+ if (dfl_disable_ss_mask) {
+ struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
+
+ /*
+ * Controllers from default hierarchy that need to be rebound
+ * are all disabled together in one go.
+ */
+ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
+
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
@@ -1570,10 +1591,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
WARN_ON(!css || cgroup_css(dcgrp, ss));
- /* disable from the source */
- src_root->subsys_mask &= ~(1 << ssid);
- WARN_ON(cgroup_apply_control(scgrp));
- cgroup_finalize_control(scgrp, 0);
+ if (src_root != &cgrp_dfl_root) {
+ /* disable from the source */
+ src_root->subsys_mask &= ~(1 << ssid);
+ WARN_ON(cgroup_apply_control(scgrp));
+ cgroup_finalize_control(scgrp, 0);
+ }
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
diff --git a/kernel/exit.c b/kernel/exit.c
index babbc3c0a181..052aa52de331 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -616,6 +616,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
+ tsk->exit_state = EXIT_ZOMBIE;
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
diff --git a/kernel/fork.c b/kernel/fork.c
index 92a0df862115..a21adc0155b9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,6 +11,7 @@
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/
+#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/unistd.h>
@@ -38,6 +39,7 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/seccomp.h>
+#include <linux/seq_file.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
@@ -1294,6 +1296,84 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
task->pids[type].pid = pid;
}
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+ struct pid *pid = file->private_data;
+
+ file->private_data = NULL;
+ put_pid(pid);
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct pid_namespace *ns = file_inode(m->file)->i_sb->s_fs_info;
+ struct pid *pid = f->private_data;
+
+ seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ seq_putc(m, '\n');
+}
+#endif
+
+/*
+ * Poll support for process exit notification.
+ */
+static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+{
+ struct task_struct *task;
+ struct pid *pid = file->private_data;
+ int poll_flags = 0;
+
+ poll_wait(file, &pid->wait_pidfd, pts);
+
+ rcu_read_lock();
+ task = pid_task(pid, PIDTYPE_PID);
+ /*
+ * Inform pollers only when the whole thread group exits.
+ * If the thread group leader exits before all other threads in the
+ * group, then poll(2) should block, similar to the wait(2) family.
+ */
+ if (!task || (task->exit_state && thread_group_empty(task)))
+ poll_flags = POLLIN | POLLRDNORM;
+ rcu_read_unlock();
+
+ return poll_flags;
+}
+
+const struct file_operations pidfd_fops = {
+ .release = pidfd_release,
+ .poll = pidfd_poll,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -1305,13 +1385,14 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
+ int __user *parent_tidptr,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
- int retval;
+ int pidfd = -1, retval;
struct task_struct *p;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
@@ -1356,6 +1437,31 @@ static struct task_struct *copy_process(unsigned long clone_flags,
return ERR_PTR(-EINVAL);
}
+ if (clone_flags & CLONE_PIDFD) {
+ int reserved;
+
+ /*
+ * - CLONE_PARENT_SETTID is useless for pidfds and also
+ * parent_tidptr is used to return pidfds.
+ * - CLONE_DETACHED is blocked so that we can potentially
+ * reuse it later for CLONE_PIDFD.
+ * - CLONE_THREAD is blocked until someone really needs it.
+ */
+ if (clone_flags &
+ (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Verify that parent_tidptr is sane so we can potentially
+ * reuse it later.
+ */
+ if (get_user(reserved, parent_tidptr))
+ return ERR_PTR(-EFAULT);
+
+ if (reserved != 0)
+ return ERR_PTR(-EINVAL);
+ }
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1538,6 +1644,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
}
+ /*
+ * This has to happen after we've potentially unshared the file
+ * descriptor table (so that the pidfd doesn't leak into the child
+ * if the fd table isn't shared).
+ */
+ if (clone_flags & CLONE_PIDFD) {
+ retval = pidfd_create(pid);
+ if (retval < 0)
+ goto bad_fork_free_pid;
+
+ pidfd = retval;
+ retval = put_user(pidfd, parent_tidptr);
+ if (retval)
+ goto bad_fork_put_pidfd;
+ }
+
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1587,7 +1709,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
*/
retval = cgroup_can_fork(p);
if (retval)
- goto bad_fork_free_pid;
+ goto bad_fork_cgroup_threadgroup_change_end;
/*
* From this point on we must avoid any synchronous user-space
@@ -1698,8 +1820,12 @@ bad_fork_cancel_cgroup:
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
+bad_fork_cgroup_threadgroup_change_end:
+ cgroup_threadgroup_change_end(current);
+bad_fork_put_pidfd:
+ if (clone_flags & CLONE_PIDFD)
+ sys_close(pidfd);
bad_fork_free_pid:
- threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
@@ -1754,7 +1880,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+ task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
@@ -1799,7 +1925,7 @@ long _do_fork(unsigned long clone_flags,
trace = 0;
}
- p = copy_process(clone_flags, stack_start, stack_size,
+ p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
/*
* Do this prior waking up the new thread - the thread pointer
diff --git a/kernel/pid.c b/kernel/pid.c
index ccfdb56321c6..9bc06f3a2b54 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,8 @@
#include <linux/syscalls.h>
#include <linux/proc_ns.h>
#include <linux/proc_fs.h>
+#include <linux/anon_inodes.h>
+//#include <linux/sched/signal.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -331,6 +333,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
for (type = 0; type < PIDTYPE_MAX; ++type)
INIT_HLIST_HEAD(&pid->tasks[type]);
+ init_waitqueue_head(&pid->wait_pidfd);
+
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
@@ -564,6 +568,76 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return pid;
}
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
+/**
+ * pidfd_open() - Open new pid file descriptor.
+ *
+ * @pid: pid for which to retrieve a pidfd
+ * @flags: flags to pass
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set for
+ * the process identified by @pid. Currently, the process identified by
+ * @pid must be a thread-group leader. This restriction currently exists
+ * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
+ * be used with CLONE_THREAD) and pidfd polling (only supports thread group
+ * leaders).
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
+{
+ int fd, ret;
+ struct pid *p;
+ struct task_struct *tsk;
+
+ if (flags)
+ return -EINVAL;
+
+ if (pid <= 0)
+ return -EINVAL;
+
+ p = find_get_pid(pid);
+ if (!p)
+ return -ESRCH;
+
+ ret = 0;
+ rcu_read_lock();
+ tsk = pid_task(p, PIDTYPE_PID);
+ /* Check that pid belongs to a group leader task */
+ if (!tsk || !thread_group_leader(tsk))
+ ret = -EINVAL;
+ rcu_read_unlock();
+
+ fd = ret ?: pidfd_create(p);
+ put_pid(p);
+ return fd;
+}
+
/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
diff --git a/kernel/signal.c b/kernel/signal.c
index a699055ebfe8..a9697e189a58 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,7 +14,9 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
@@ -1632,6 +1634,14 @@ ret:
return ret;
}
+static void do_notify_pidfd(struct task_struct *task)
+{
+ struct pid *pid;
+
+ pid = task_pid(task);
+ wake_up_all(&pid->wait_pidfd);
+}
+
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1655,6 +1665,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
+ /* Wake up all pidfd waiters */
+ do_notify_pidfd(tsk);
+
if (sig != SIGCHLD) {
/*
* This is only possible if parent == real_parent.
@@ -2922,6 +2935,15 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
return ret;
}
+static inline void prepare_kill_siginfo(int sig, struct siginfo *info)
+{
+ info->si_signo = sig;
+ info->si_errno = 0;
+ info->si_code = SI_USER;
+ info->si_pid = task_tgid_vnr(current);
+ info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
@@ -2931,15 +2953,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
- info.si_signo = sig;
- info.si_errno = 0;
- info.si_code = SI_USER;
- info.si_pid = task_tgid_vnr(current);
- info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+ prepare_kill_siginfo(sig, &info);
return kill_something_info(sig, &info, pid);
}
+/*
+ * Verify that the signaler and signalee either are in the same pid namespace
+ * or that the signaler's pid namespace is an ancestor of the signalee's pid
+ * namespace.
+ */
+static bool access_pidfd_pidns(struct pid *pid)
+{
+ struct pid_namespace *active = task_active_pid_ns(current);
+ struct pid_namespace *p = ns_of_pid(pid);
+
+ for (;;) {
+ if (!p)
+ return false;
+ if (p == active)
+ break;
+ p = p->parent;
+ }
+
+ return true;
+}
+
+static struct pid *pidfd_to_pid(const struct file *file)
+{
+ if (file->f_op == &pidfd_fops)
+ return file->private_data;
+
+ return tgid_pidfd_to_pid(file);
+}
+
+static int copy_siginfo_from_user_any(siginfo_t *kinfo, siginfo_t __user *info)
+{
+#ifdef CONFIG_COMPAT
+ /*
+ * Avoid hooking up compat syscalls and instead handle necessary
+ * conversions here. Note, this is a stop-gap measure and should not be
+ * considered a generic solution.
+ */
+ if (in_compat_syscall())
+ return copy_siginfo_from_user32(
+ kinfo, (struct compat_siginfo __user *)info);
+#endif
+ return copy_from_user(kinfo, info, sizeof(siginfo_t));
+}
+
+/**
+ * sys_pidfd_send_signal - Signal a process through a pidfd
+ * @pidfd: file descriptor of the process
+ * @sig: signal to send
+ * @info: signal info
+ * @flags: future flags
+ *
+ * The syscall currently only signals via PIDTYPE_PID which covers
+ * kill(<positive-pid>, <signal>. It does not signal threads or process
+ * groups.
+ * In order to extend the syscall to threads and process groups the @flags
+ * argument should be used. In essence, the @flags argument will determine
+ * what is signaled and not the file descriptor itself. Put in other words,
+ * grouping is a property of the flags argument not a property of the file
+ * descriptor.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+ siginfo_t __user *, info, unsigned int, flags)
+{
+ int ret;
+ struct fd f;
+ struct pid *pid;
+ siginfo_t kinfo;
+
+ /* Enforce flags be set to 0 until we add an extension. */
+ if (flags)
+ return -EINVAL;
+
+ f = fdget(pidfd);
+ if (!f.file)
+ return -EBADF;
+
+ /* Is this a pidfd? */
+ pid = pidfd_to_pid(f.file);
+ if (IS_ERR(pid)) {
+ ret = PTR_ERR(pid);
+ goto err;
+ }
+
+ ret = -EINVAL;
+ if (!access_pidfd_pidns(pid))
+ goto err;
+
+ if (info) {
+ ret = copy_siginfo_from_user_any(&kinfo, info);
+ if (unlikely(ret))
+ goto err;
+
+ ret = -EINVAL;
+ if (unlikely(sig != kinfo.si_signo))
+ goto err;
+
+ /* Only allow sending arbitrary signals to yourself. */
+ ret = -EPERM;
+ if ((task_pid(current) != pid) &&
+ (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
+ goto err;
+ } else {
+ prepare_kill_siginfo(sig, &kinfo);
+ }
+
+ ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+ fdput(f);
+ return ret;
+}
+
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c5484723abda..74fa302d9a68 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -128,8 +128,9 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs);
#else
/* See comment below, where ftrace_ops_list_func is defined */
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
-#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs);
+#define ftrace_ops_list_func ftrace_ops_no_ops
#endif
/*
@@ -5229,7 +5230,8 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
}
NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *regs)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
@@ -5677,14 +5679,17 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
}
+void ftrace_graph_return_stub(struct ftrace_graph_ret *trace)
+{
+}
+
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
{
return 0;
}
/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ret_t ftrace_graph_return = ftrace_graph_return_stub;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -5912,7 +5917,7 @@ void unregister_ftrace_graph(void)
goto out;
ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_return = ftrace_graph_return_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
__ftrace_graph_entry = ftrace_graph_entry_stub;
ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a67f792fb950..17996354c745 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7026,7 +7026,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
}
-static struct vfsmount *trace_automount(void *ingore)
+static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
@@ -7039,7 +7039,7 @@ static struct vfsmount *trace_automount(void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+ mnt = vfs_submount(mntpt, type, "tracefs", NULL);
put_filesystem(type);
if (IS_ERR(mnt))
return NULL;