diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 80 |
1 files changed, 52 insertions, 28 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 8eaba44fdc09..4c417486a1b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,7 @@ #include <linux/memcontrol.h> #include <linux/static_key.h> #include <linux/sched.h> +#include <linux/cgroup-defs.h> #include <linux/filter.h> #include <linux/rculist_nulls.h> @@ -192,7 +193,7 @@ struct sock_common { int skc_bound_dev_if; union { struct hlist_node skc_bind_node; - struct hlist_nulls_node skc_portaddr_node; + struct hlist_node skc_portaddr_node; }; struct proto *skc_prot; possible_net_t skc_net; @@ -287,7 +288,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -309,7 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -318,6 +318,7 @@ struct cg_proto; * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 + * @sk_reuseport_cb: reuseport group container */ struct sock { /* @@ -426,11 +427,11 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + spinlock_t sk_peer_lock; + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; long sk_sndtimeo; struct timer_list sk_timer; @@ -449,11 +450,8 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; kuid_t sk_uid; -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); @@ -463,6 +461,7 @@ struct sock { struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); struct rcu_head sk_rcu; + struct sock_reuseport __rcu *sk_reuseport_cb; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) @@ -700,18 +699,18 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_for_each_entry(__sk, list, sk_bind_node) /** - * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset + * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset * @tpos: the type * to use as a loop cursor. * @pos: the &struct hlist_node to use as a loop cursor. * @head: the head for your list. * @offset: offset of hlist_node within the struct. * */ -#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \ - for (pos = (head)->first; \ - (!is_a_nulls(pos)) && \ +#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \ + for (pos = rcu_dereference((head)->first); \ + pos != NULL && \ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) static inline struct user_namespace *sk_user_ns(struct sock *sk) { @@ -782,6 +781,8 @@ static inline int sk_memalloc_socks(void) { return static_key_false(&memalloc_socks); } + +void __receive_sock(struct file *file); #else static inline int sk_memalloc_socks(void) @@ -789,6 +790,8 @@ static inline int sk_memalloc_socks(void) return 0; } +static inline void __receive_sock(struct file *file) +{ } #endif static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) @@ -1016,7 +1019,7 @@ struct proto { void (*release_cb)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ - void (*hash)(struct sock *sk); + int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); @@ -1143,6 +1146,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk) sk_stream_memory_free(sk); } +static inline int sk_under_cgroup_hierarchy(struct sock *sk, + struct cgroup *ancestor) +{ +#ifdef CONFIG_SOCK_CGROUP_DATA + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), + ancestor); +#else + return -ENOTSUPP; +#endif +} static inline bool sk_has_memory_pressure(const struct sock *sk) { @@ -1208,11 +1221,13 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, unsigned long amt, int *parent_status) { - page_counter_charge(&prot->memory_allocated, amt); + struct page_counter *counter; + + if (page_counter_try_charge(&prot->memory_allocated, amt, &counter)) + return; - if (page_counter_read(&prot->memory_allocated) > - prot->memory_allocated.limit) - *parent_status = OVER_LIMIT; + page_counter_charge(&prot->memory_allocated, amt); + *parent_status = OVER_LIMIT; } static inline void memcg_memory_allocated_sub(struct cg_proto *prot, @@ -1286,7 +1301,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { struct proto *prot = sk->sk_prot; @@ -1333,10 +1348,10 @@ static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, /* With per-bucket locks this operation is not-atomic, so that * this version is not worse. */ -static inline void __sk_prot_rehash(struct sock *sk) +static inline int __sk_prot_rehash(struct sock *sk) { sk->sk_prot->unhash(sk); - sk->sk_prot->hash(sk); + return sk->sk_prot->hash(sk); } void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); @@ -1655,7 +1670,13 @@ static inline void sock_put(struct sock *sk) */ void sock_gen_put(struct sock *sk); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); +int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, + unsigned int trim_cap); +static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb, + const int nested) +{ + return __sk_receive_skb(sk, skb, nested, 1); +} static inline void sk_tx_queue_set(struct sock *sk, int tx_queue) { @@ -1674,7 +1695,6 @@ static inline int sk_tx_queue_get(const struct sock *sk) static inline void sk_set_socket(struct sock *sk, struct socket *sock) { - sk_tx_queue_clear(sk); sk->sk_socket = sock; } @@ -1727,7 +1747,8 @@ static inline u32 net_tx_rndhash(void) static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); } static inline void sk_rethink_txhash(struct sock *sk) @@ -1979,9 +2000,12 @@ static inline void sock_poll_wait(struct file *filp, static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } } |