diff options
| author | Greg Kroah-Hartman <gregkh@google.com> | 2021-08-04 12:50:19 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@google.com> | 2021-08-04 12:50:19 +0200 |
| commit | 449846c3e01f672fdb33412056058564d2cfaf21 (patch) | |
| tree | 6c641c51ec696449cbd6d881c7aa30ae9af431fd /net/unix/af_unix.c | |
| parent | 137e37851b23293102f8c090dffb4488a4170c4a (diff) | |
| parent | 372cffad865ffc79132d858ab0526dd51f97b0c8 (diff) | |
Merge 4.4.278 into android-4.4-p
Changes in 4.4.278
net: split out functions related to registering inflight socket files
af_unix: fix garbage collect vs MSG_PEEK
workqueue: fix UAF in pwq_unbound_release_workfn()
net/802/mrp: fix memleak in mrp_request_join()
net/802/garp: fix memleak in garp_request_join()
sctp: move 198 addresses from unusable to private scope
hfs: add missing clean-up in hfs_fill_super
hfs: fix high memory mapping in hfs_bnode_read
hfs: add lock nesting notation to hfs_find_init
ARM: dts: versatile: Fix up interrupt controller node names
lib/string.c: add multibyte memset functions
ARM: ensure the signal page contains defined contents
ocfs2: fix zero out valid data
ocfs2: issue zeroout to EOF blocks
can: usb_8dev: fix memory leak
can: ems_usb: fix memory leak
can: esd_usb2: fix memory leak
NIU: fix incorrect error return, missed in previous revert
x86/asm: Ensure asm/proto.h can be included stand-alone
cfg80211: Fix possible memory leak in function cfg80211_bss_update
netfilter: nft_nat: allow to specify layer 4 protocol NAT only
tipc: fix sleeping in tipc accept routine
mlx4: Fix missing error code in mlx4_load_one()
net: llc: fix skb_over_panic
tulip: windbond-840: Fix missing pci_disable_device() in probe and remove
sis900: Fix missing pci_disable_device() in probe and remove
Linux 4.4.278
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I51e4e7e9cc9db03de57626e25e3785c400ced81f
Diffstat (limited to 'net/unix/af_unix.c')
| -rw-r--r-- | net/unix/af_unix.c | 115 |
1 files changed, 45 insertions, 70 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 33948cc03ba6..ac95ef644412 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,6 +118,8 @@ #include <linux/security.h> #include <linux/freezer.h> +#include "scm.h" + struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); @@ -1504,78 +1506,51 @@ out: return err; } -static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) -{ - int i; - - scm->fp = UNIXCB(skb).fp; - UNIXCB(skb).fp = NULL; - - for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->user, scm->fp->fp[i]); -} - -static void unix_destruct_scm(struct sk_buff *skb) -{ - struct scm_cookie scm; - memset(&scm, 0, sizeof(scm)); - scm.pid = UNIXCB(skb).pid; - if (UNIXCB(skb).fp) - unix_detach_fds(&scm, skb); - - /* Alas, it calls VFS */ - /* So fscking what? fput() had been SMP-safe since the last Summer */ - scm_destroy(&scm); - sock_wfree(skb); -} - -/* - * The "user->unix_inflight" variable is protected by the garbage - * collection lock, and we just read it locklessly here. If you go - * over the limit, there might be a tiny race in actually noticing - * it across threads. Tough. - */ -static inline bool too_many_unix_fds(struct task_struct *p) -{ - struct user_struct *user = current_user(); - - if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) - return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); - return false; -} - -#define MAX_RECURSION_LEVEL 4 - -static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { - int i; - unsigned char max_level = 0; - - if (too_many_unix_fds(current)) - return -ETOOMANYREFS; - - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; + scm->fp = scm_fp_dup(UNIXCB(skb).fp); /* - * Need to duplicate file references for the sake of garbage - * collection. Otherwise a socket in the fps might become a - * candidate for GC while the skb is not yet queued. + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. */ - UNIXCB(skb).fp = scm_fp_dup(scm->fp); - if (!UNIXCB(skb).fp) - return -ENOMEM; - - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -2203,7 +2178,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2448,7 +2423,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); |
