summaryrefslogtreecommitdiff
path: root/net/unix
diff options
context:
space:
mode:
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c275
-rw-r--r--net/unix/garbage.c70
2 files changed, 273 insertions, 72 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5266ea7b922b..03ee4d359f6a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -140,12 +140,17 @@ static struct hlist_head *unix_sockets_unbound(void *addr)
#ifdef CONFIG_SECURITY_NETWORK
static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
- memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
+ UNIXCB(skb).secid = scm->secid;
}
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
- scm->secid = *UNIXSID(skb);
+ scm->secid = UNIXCB(skb).secid;
+}
+
+static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ return (scm->secid == UNIXCB(skb).secid);
}
#else
static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -153,6 +158,11 @@ static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{ }
+
+static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ return true;
+}
#endif /* CONFIG_SECURITY_NETWORK */
/*
@@ -518,6 +528,11 @@ static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
+ size_t size, int flags);
+static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
+ struct pipe_inode_info *, size_t size,
+ unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
@@ -558,7 +573,8 @@ static const struct proto_ops unix_stream_ops = {
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
+ .sendpage = unix_stream_sendpage,
+ .splice_read = unix_stream_splice_read,
.set_peek_off = unix_set_peek_off,
};
@@ -620,7 +636,7 @@ static struct proto unix_proto = {
*/
static struct lock_class_key af_unix_sk_receive_queue_lock_key;
-static struct sock *unix_create1(struct net *net, struct socket *sock)
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
{
struct sock *sk = NULL;
struct unix_sock *u;
@@ -629,7 +645,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
if (!sk)
goto out;
@@ -688,7 +704,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock) ? 0 : -ENOMEM;
+ return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
@@ -1088,7 +1104,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL);
+ newsk = unix_create1(sock_net(sk), NULL, 0);
if (newsk == NULL)
goto out;
@@ -1408,6 +1424,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
UNIXCB(skb).uid = scm->creds.uid;
UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
+ unix_get_secdata(scm, skb);
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1503,7 +1520,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out_free;
max_level = err + 1;
- unix_get_secdata(&scm, skb);
skb_put(skb, len - data_len);
skb->data_len = data_len;
@@ -1720,6 +1736,101 @@ out_err:
return sent ? : err;
}
+static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
+ int offset, size_t size, int flags)
+{
+ int err = 0;
+ bool send_sigpipe = true;
+ struct sock *other, *sk = socket->sk;
+ struct sk_buff *skb, *newskb = NULL, *tail = NULL;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ other = unix_peer(sk);
+ if (!other || sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTCONN;
+
+ if (false) {
+alloc_skb:
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->readlock);
+ newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
+ &err, 0);
+ if (!newskb)
+ return err;
+ }
+
+ /* we must acquire readlock as we modify already present
+ * skbs in the sk_receive_queue and mess with skb->len
+ */
+ err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+ if (err) {
+ err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
+ send_sigpipe = false;
+ goto err;
+ }
+
+ if (sk->sk_shutdown & SEND_SHUTDOWN) {
+ err = -EPIPE;
+ goto err_unlock;
+ }
+
+ unix_state_lock(other);
+
+ if (sock_flag(other, SOCK_DEAD) ||
+ other->sk_shutdown & RCV_SHUTDOWN) {
+ err = -EPIPE;
+ goto err_state_unlock;
+ }
+
+ skb = skb_peek_tail(&other->sk_receive_queue);
+ if (tail && tail == skb) {
+ skb = newskb;
+ } else if (!skb) {
+ if (newskb)
+ skb = newskb;
+ else
+ goto alloc_skb;
+ } else if (newskb) {
+ /* this is fast path, we don't necessarily need to
+ * call to kfree_skb even though with newskb == NULL
+ * this - does no harm
+ */
+ consume_skb(newskb);
+ }
+
+ if (skb_append_pagefrags(skb, page, offset, size)) {
+ tail = skb;
+ goto alloc_skb;
+ }
+
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += size;
+ atomic_add(size, &sk->sk_wmem_alloc);
+
+ if (newskb)
+ __skb_queue_tail(&other->sk_receive_queue, newskb);
+
+ unix_state_unlock(other);
+ mutex_unlock(&unix_sk(other)->readlock);
+
+ other->sk_data_ready(other);
+
+ return size;
+
+err_state_unlock:
+ unix_state_unlock(other);
+err_unlock:
+ mutex_unlock(&unix_sk(other)->readlock);
+err:
+ kfree_skb(newskb);
+ if (send_sigpipe && !(flags & MSG_NOSIGNAL))
+ send_sig(SIGPIPE, current, 0);
+ return err;
+}
+
static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -1860,8 +1971,9 @@ out:
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last)
+ struct sk_buff *last, unsigned int last_len)
{
+ struct sk_buff *tail;
DEFINE_WAIT(wait);
unix_state_lock(sk);
@@ -1869,7 +1981,9 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (skb_peek_tail(&sk->sk_receive_queue) != last ||
+ tail = skb_peek_tail(&sk->sk_receive_queue);
+ if (tail != last ||
+ (tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1880,6 +1994,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
unix_state_unlock(sk);
timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_DEAD))
+ break;
+
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
@@ -1893,38 +2011,50 @@ static unsigned int unix_skb_len(const struct sk_buff *skb)
return skb->len - UNIXCB(skb).consumed;
}
-static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+struct unix_stream_read_state {
+ int (*recv_actor)(struct sk_buff *, int, int,
+ struct unix_stream_read_state *);
+ struct socket *socket;
+ struct msghdr *msg;
+ struct pipe_inode_info *pipe;
+ size_t size;
+ int flags;
+ unsigned int splice_flags;
+};
+
+static int unix_stream_read_generic(struct unix_stream_read_state *state)
{
struct scm_cookie scm;
+ struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
int copied = 0;
+ int flags = state->flags;
int noblock = flags & MSG_DONTWAIT;
- int check_creds = 0;
+ bool check_creds = false;
int target;
int err = 0;
long timeo;
int skip;
+ size_t size = state->size;
+ unsigned int last_len;
err = -EINVAL;
if (sk->sk_state != TCP_ESTABLISHED)
goto out;
err = -EOPNOTSUPP;
- if (flags&MSG_OOB)
+ if (flags & MSG_OOB)
goto out;
- target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
+ memset(&scm, 0, sizeof(scm));
+
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
-
- memset(&scm, 0, sizeof(scm));
-
err = mutex_lock_interruptible(&u->readlock);
if (unlikely(err)) {
/* recvmsg() in non blocking mode is supposed to return -EAGAIN
@@ -1939,7 +2069,12 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
struct sk_buff *skb, *last;
unix_state_lock(sk);
+ if (sock_flag(sk, SOCK_DEAD)) {
+ err = -ECONNRESET;
+ goto unlock;
+ }
last = skb = skb_peek(&sk->sk_receive_queue);
+ last_len = last ? last->len : 0;
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1962,16 +2097,17 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo, last);
+ timeo = unix_stream_data_wait(sk, timeo, last,
+ last_len);
- if (signal_pending(current)
- || mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current) ||
+ mutex_lock_interruptible(&u->readlock)) {
err = sock_intr_errno(timeo);
goto out;
}
continue;
- unlock:
+unlock:
unix_state_unlock(sk);
break;
}
@@ -1980,6 +2116,7 @@ again:
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
+ last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -1991,23 +2128,27 @@ again:
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != scm.pid) ||
!uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
- !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
+ !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
+ !unix_secdata_eq(&scm, skb))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
- check_creds = 1;
+ unix_set_secdata(&scm, skb);
+ check_creds = true;
}
/* Copy address just once */
- if (sunaddr) {
- unix_copy_addr(msg, skb->sk);
+ if (state->msg && state->msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
+ state->msg->msg_name);
+ unix_copy_addr(state->msg, skb->sk);
sunaddr = NULL;
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
- if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
- msg, chunk)) {
+ chunk = state->recv_actor(skb, skip, chunk, state);
+ if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
break;
@@ -2045,11 +2186,85 @@ again:
} while (size);
mutex_unlock(&u->readlock);
- scm_recv(sock, msg, &scm, flags);
+ if (state->msg)
+ scm_recv(sock, state->msg, &scm, flags);
+ else
+ scm_destroy(&scm);
out:
return copied ? : err;
}
+static int unix_stream_read_actor(struct sk_buff *skb,
+ int skip, int chunk,
+ struct unix_stream_read_state *state)
+{
+ int ret;
+
+ ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
+ state->msg, chunk);
+ return ret ?: chunk;
+}
+
+static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_read_actor,
+ .socket = sock,
+ .msg = msg,
+ .size = size,
+ .flags = flags
+ };
+
+ return unix_stream_read_generic(&state);
+}
+
+static ssize_t skb_unix_socket_splice(struct sock *sk,
+ struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ int ret;
+ struct unix_sock *u = unix_sk(sk);
+
+ mutex_unlock(&u->readlock);
+ ret = splice_to_pipe(pipe, spd);
+ mutex_lock(&u->readlock);
+
+ return ret;
+}
+
+static int unix_stream_splice_actor(struct sk_buff *skb,
+ int skip, int chunk,
+ struct unix_stream_read_state *state)
+{
+ return skb_splice_bits(skb, state->socket->sk,
+ UNIXCB(skb).consumed + skip,
+ state->pipe, chunk, state->splice_flags,
+ skb_unix_socket_splice);
+}
+
+static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t size, unsigned int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_splice_actor,
+ .socket = sock,
+ .pipe = pipe,
+ .size = size,
+ .splice_flags = flags,
+ };
+
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+ if (sock->file->f_flags & O_NONBLOCK ||
+ flags & SPLICE_F_NONBLOCK)
+ state.flags = MSG_DONTWAIT;
+
+ return unix_stream_read_generic(&state);
+}
+
static int unix_shutdown(struct socket *sock, int mode)
{
struct sock *sk = sock->sk;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 99f7012b23b9..a73a226f2d33 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
unsigned int unix_tot_inflight;
-
struct sock *unix_get_socket(struct file *filp)
{
struct sock *u_sock = NULL;
struct inode *inode = file_inode(filp);
- /*
- * Socket ?
- */
+ /* Socket ? */
if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
struct sock *s = sock->sk;
- /*
- * PF_UNIX ?
- */
+ /* PF_UNIX ? */
if (s && sock->ops && sock->ops->family == PF_UNIX)
u_sock = s;
}
return u_sock;
}
-/*
- * Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
*/
void unix_inflight(struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+
if (s) {
struct unix_sock *u = unix_sk(s);
+
spin_lock(&unix_gc_lock);
+
if (atomic_long_inc_return(&u->inflight) == 1) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
@@ -142,10 +139,13 @@ void unix_inflight(struct file *fp)
void unix_notinflight(struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+
if (s) {
struct unix_sock *u = unix_sk(s);
+
spin_lock(&unix_gc_lock);
BUG_ON(list_empty(&u->link));
+
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
unix_tot_inflight--;
@@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
spin_lock(&x->sk_receive_queue.lock);
skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- /*
- * Do we have file descriptors ?
- */
+ /* Do we have file descriptors ? */
if (UNIXCB(skb).fp) {
bool hit = false;
- /*
- * Process the descriptors of this socket
- */
+ /* Process the descriptors of this socket */
int nfd = UNIXCB(skb).fp->count;
struct file **fp = UNIXCB(skb).fp->fp;
+
while (nfd--) {
- /*
- * Get the socket the fd matches
- * if it indeed does so
- */
+ /* Get the socket the fd matches if it indeed does so */
struct sock *sk = unix_get_socket(*fp++);
+
if (sk) {
struct unix_sock *u = unix_sk(sk);
- /*
- * Ignore non-candidates, they could
+ /* Ignore non-candidates, they could
* have been added to the queues after
* starting the garbage collection
*/
if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
hit = true;
+
func(u);
}
}
@@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
{
- if (x->sk_state != TCP_LISTEN)
+ if (x->sk_state != TCP_LISTEN) {
scan_inflight(x, func, hitlist);
- else {
+ } else {
struct sk_buff *skb;
struct sk_buff *next;
struct unix_sock *u;
LIST_HEAD(embryos);
- /*
- * For a listening socket collect the queued embryos
+ /* For a listening socket collect the queued embryos
* and perform a scan on them as well.
*/
spin_lock(&x->sk_receive_queue.lock);
skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
u = unix_sk(skb->sk);
- /*
- * An embryo cannot be in-flight, so it's safe
+ /* An embryo cannot be in-flight, so it's safe
* to use the list link.
*/
BUG_ON(!list_empty(&u->link));
@@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk)
static void inc_inflight_move_tail(struct unix_sock *u)
{
atomic_long_inc(&u->inflight);
- /*
- * If this still might be part of a cycle, move it to the end
+ /* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
*/
@@ -263,8 +255,7 @@ static bool gc_in_progress;
void wait_for_unix_gc(void)
{
- /*
- * If number of inflight sockets is insane,
+ /* If number of inflight sockets is insane,
* force a garbage collect right now.
*/
if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
@@ -288,8 +279,7 @@ void unix_gc(void)
goto out;
gc_in_progress = true;
- /*
- * First, select candidates for garbage collection. Only
+ /* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
*
@@ -320,15 +310,13 @@ void unix_gc(void)
}
}
- /*
- * Now remove all internal in-flight reference to children of
+ /* Now remove all internal in-flight reference to children of
* the candidates.
*/
list_for_each_entry(u, &gc_candidates, link)
scan_children(&u->sk, dec_inflight, NULL);
- /*
- * Restore the references for children of all candidates,
+ /* Restore the references for children of all candidates,
* which have remaining references. Do this recursively, so
* only those remain, which form cyclic references.
*
@@ -350,8 +338,7 @@ void unix_gc(void)
}
list_del(&cursor);
- /*
- * not_cycle_list contains those sockets which do not make up a
+ /* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
while (!list_empty(&not_cycle_list)) {
@@ -360,8 +347,7 @@ void unix_gc(void)
list_move_tail(&u->link, &gc_inflight_list);
}
- /*
- * Now gc_candidates contains only garbage. Restore original
+ /* Now gc_candidates contains only garbage. Restore original
* inflight counters for these as well, and remove the skbuffs
* which are creating the cycle(s).
*/