summaryrefslogtreecommitdiff
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
authorMichael Bestas <mkbestas@lineageos.org>2021-08-12 16:02:48 +0300
committerMichael Bestas <mkbestas@lineageos.org>2021-08-12 16:02:48 +0300
commitafefb5a03546b25a00c89366a837de8d5dc12704 (patch)
tree42d4719f9a186a70a5889e797e0819dc890eb719 /net/unix/af_unix.c
parent48fe008ed6afcfa9ce3138b3558cd9c5764fc8c1 (diff)
parent449846c3e01f672fdb33412056058564d2cfaf21 (diff)
Merge branch 'android-4.4-p' of https://android.googlesource.com/kernel/common into lineage-18.1-caf-msm8998
# By Pavel Skripkin (6) and others # Via Greg Kroah-Hartman * android-4.4-p: Linux 4.4.278 sis900: Fix missing pci_disable_device() in probe and remove tulip: windbond-840: Fix missing pci_disable_device() in probe and remove net: llc: fix skb_over_panic mlx4: Fix missing error code in mlx4_load_one() tipc: fix sleeping in tipc accept routine netfilter: nft_nat: allow to specify layer 4 protocol NAT only cfg80211: Fix possible memory leak in function cfg80211_bss_update x86/asm: Ensure asm/proto.h can be included stand-alone NIU: fix incorrect error return, missed in previous revert can: esd_usb2: fix memory leak can: ems_usb: fix memory leak can: usb_8dev: fix memory leak ocfs2: issue zeroout to EOF blocks ocfs2: fix zero out valid data ARM: ensure the signal page contains defined contents lib/string.c: add multibyte memset functions ARM: dts: versatile: Fix up interrupt controller node names hfs: add lock nesting notation to hfs_find_init hfs: fix high memory mapping in hfs_bnode_read hfs: add missing clean-up in hfs_fill_super sctp: move 198 addresses from unusable to private scope net/802/garp: fix memleak in garp_request_join() net/802/mrp: fix memleak in mrp_request_join() workqueue: fix UAF in pwq_unbound_release_workfn() af_unix: fix garbage collect vs MSG_PEEK net: split out functions related to registering inflight socket files Linux 4.4.277 btrfs: compression: don't try to compress if we don't have enough pages iio: accel: bma180: Fix BMA25x bandwidth register values iio: accel: bma180: Use explicit member assignment net: bcmgenet: ensure EXT_ENERGY_DET_MASK is clear media: ngene: Fix out-of-bounds bug in ngene_command_config_free_buf() tracing: Fix bug in rb_per_cpu_empty() that might cause deadloop. USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick USB: serial: cp210x: fix comments for GE CS1000 USB: serial: option: add support for u-blox LARA-R6 family usb: renesas_usbhs: Fix superfluous irqs happen after usb_pkt_pop() usb: max-3421: Prevent corruption of freed memory USB: usb-storage: Add LaCie Rugged USB3-FW to IGNORE_UAS usb: hub: Disable USB 3 device initiated lpm if exit latency is too high KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow xhci: Fix lost USB 2 remote wake ALSA: sb: Fix potential ABBA deadlock in CSP driver s390/ftrace: fix ftrace_update_ftrace_func implementation proc: Avoid mixing integer types in mem_rw() Revert "USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem" scsi: target: Fix protect handling in WRITE SAME(32) scsi: iscsi: Fix iface sysfs attr detection netrom: Decrease sock refcount when sock timers expire net: decnet: Fix sleeping inside in af_decnet net: fix uninit-value in caif_seqpkt_sendmsg s390/bpf: Perform r1 range checking before accessing jit->seen_reg[r1] perf probe-file: Delete namelist in del_events() on the error path perf test bpf: Free obj_buf igb: Check if num of q_vectors is smaller than max before array access iavf: Fix an error handling path in 'iavf_probe()' ipv6: tcp: drop silly ICMPv6 packet too big messages tcp: annotate data races around tp->mtu_info net: validate lwtstate->data before returning from skb_tunnel_info() net: ti: fix UAF in tlan_remove_one net: moxa: fix UAF in moxart_mac_probe net: bcmgenet: Ensure all TX/RX queues DMAs are disabled net: ipv6: fix return value of ip6_skb_dst_mtu x86/fpu: Make init_fpstate correct with optimized XSAVE Revert "memory: fsl_ifc: fix leak of IO mapping on probe failure" sched/fair: Fix CFS bandwidth hrtimer expiry type scsi: aic7xxx: Fix unintentional sign extension issue on left shift of u8 kbuild: mkcompile_h: consider timestamp if KBUILD_BUILD_TIMESTAMP is set thermal/core: Correct function name thermal_zone_device_unregister() ARM: imx: pm-imx5: Fix references to imx5_cpu_suspend_info ARM: dts: imx6: phyFLEX: Fix UART hardware flow control ARM: dts: BCM63xx: Fix NAND nodes names ARM: brcmstb: dts: fix NAND nodes names Change-Id: Id59b93b8704270f45923f262facbadde4c486a15
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c115
1 files changed, 45 insertions, 70 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a0e8cb24af35..79687d5a95ac 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -118,6 +118,8 @@
#include <linux/security.h>
#include <linux/freezer.h>
+#include "scm.h"
+
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -1504,78 +1506,51 @@ out:
return err;
}
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-#define MAX_RECURSION_LEVEL 4
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
- int i;
- unsigned char max_level = 0;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- for (i = scm->fp->count - 1; i >= 0; i--) {
- struct sock *sk = unix_get_socket(scm->fp->fp[i]);
-
- if (sk)
- max_level = max(max_level,
- unix_sk(sk)->recursion_level);
- }
- if (unlikely(max_level > MAX_RECURSION_LEVEL))
- return -ETOOMANYREFS;
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
/*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
*/
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return max_level;
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -2203,7 +2178,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
@@ -2448,7 +2423,7 @@ unlock:
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);