From e56c57d0d3fdbbdf583d3af96bfb803b8dfa713e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Nov 2011 17:07:07 -0500 Subject: net: rename sk_clone to sk_clone_lock Make clear that sk_clone() and inet_csk_clone() return a locked socket. Add _lock() prefix and kerneldoc. Suggested-by: Linus Torvalds Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index abb6e0f0c3c3..67cd4581b6da 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1089,8 +1089,8 @@ extern struct sock *sk_alloc(struct net *net, int family, struct proto *prot); extern void sk_free(struct sock *sk); extern void sk_release_kernel(struct sock *sk); -extern struct sock *sk_clone(const struct sock *sk, - const gfp_t priority); +extern struct sock *sk_clone_lock(const struct sock *sk, + const gfp_t priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, -- cgit v1.2.3 From 6e3e939f3b1bf8534b32ad09ff199d88800835a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 9 Nov 2011 10:15:42 +0100 Subject: net: add wireless TX status socket option The 802.1X EAPOL handshake hostapd does requires knowing whether the frame was ack'ed by the peer. Currently, we fudge this pretty badly by not even transmitting the frame as a normal data frame but injecting it with radiotap and getting the status out of radiotap monitor as well. This is rather complex, confuses users (mon.wlan0 presence) and doesn't work with all hardware. To get rid of that hack, introduce a real wifi TX status option for data frame transmissions. This works similar to the existing TX timestamping in that it reflects the SKB back to the socket's error queue with a SCM_WIFI_STATUS cmsg that has an int indicating ACK status (0/1). Since it is possible that at some point we will want to have TX timestamping and wifi status in a single errqueue SKB (there's little point in not doing that), redefine SO_EE_ORIGIN_TIMESTAMPING to SO_EE_ORIGIN_TXSTATUS which can collect more than just the timestamp; keep the old constant as an alias of course. Currently the internal APIs don't make that possible, but it wouldn't be hard to split them up in a way that makes it possible. Thanks to Neil Horman for helping me figure out the functions that add the control messages. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/sock.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 5ac682f73d63..fa6f5381c5d6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -564,6 +564,7 @@ enum sock_flags { SOCK_FASYNC, /* fasync() active */ SOCK_RXQ_OVFL, SOCK_ZEROCOPY, /* buffers from userspace */ + SOCK_WIFI_STATUS, /* push wifi status to userspace */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) @@ -1714,6 +1715,8 @@ static inline int sock_intr_errno(long timeo) extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb); static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) @@ -1741,6 +1744,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) __sock_recv_timestamp(msg, sk, skb); else sk->sk_stamp = kt; + + if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid) + __sock_recv_wifi_status(msg, sk, skb); } extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, -- cgit v1.2.3 From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Nov 2011 15:29:55 +0000 Subject: net: introduce and use netdev_features_t for device features sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: add couple missing conversions in drivers split unexporting netdev_fix_features() implemented %pNF convert sock::sk_route_(no?)caps Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 67cd4581b6da..1331008ad885 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -306,8 +306,8 @@ struct sock { kmemcheck_bitfield_end(flags); int sk_wmem_queued; gfp_t sk_allocation; - int sk_route_caps; - int sk_route_nocaps; + netdev_features_t sk_route_caps; + netdev_features_t sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; int sk_rcvlowat; @@ -1393,7 +1393,7 @@ static inline int sk_can_gso(const struct sock *sk) extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_nocaps_add(struct sock *sk, int flags) +static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) { sk->sk_route_nocaps |= flags; sk->sk_route_caps &= ~flags; -- cgit v1.2.3 From 5bc1421e34ecfe0bd4b26dc3232b7d5e25179144 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 22 Nov 2011 05:10:51 +0000 Subject: net: add network priority cgroup infrastructure (v4) This patch adds in the infrastructure code to create the network priority cgroup. The cgroup, in addition to the standard processes file creates two control files: 1) prioidx - This is a read-only file that exports the index of this cgroup. This is a value that is both arbitrary and unique to a cgroup in this subsystem, and is used to index the per-device priority map 2) priomap - This is a writeable file. On read it reports a table of 2-tuples where name is the name of a network interface and priority is indicates the priority assigned to frames egresessing on the named interface and originating from a pid in this cgroup This cgroup allows for skb priority to be set prior to a root qdisc getting selected. This is benenficial for DCB enabled systems, in that it allows for any application to use dcb configured priorities so without application modification Signed-off-by: Neil Horman Signed-off-by: John Fastabend CC: Robert Love CC: "David S. Miller" Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 1c28f394d8ec..8ac338cb39ce 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -320,6 +320,9 @@ struct sock { unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; +#ifdef CONFIG_CGROUPS + __u32 sk_cgrp_prioidx; +#endif struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; -- cgit v1.2.3 From 180d8cd942ce336b2c869d324855c40c5db478ad Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sun, 11 Dec 2011 21:47:02 +0000 Subject: foundations of per-cgroup memory pressure controlling. This patch replaces all uses of struct sock fields' memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem to acessor macros. Those macros can either receive a socket argument, or a mem_cgroup argument, depending on the context they live in. Since we're only doing a macro wrapping here, no performance impact at all is expected in the case where we don't have cgroups disabled. Signed-off-by: Glauber Costa Reviewed-by: Hiroyouki Kamezawa CC: David S. Miller CC: Eric W. Biederman CC: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 8ac338cb39ce..ed0dbf034539 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -867,6 +868,99 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ +static inline bool sk_has_memory_pressure(const struct sock *sk) +{ + return sk->sk_prot->memory_pressure != NULL; +} + +static inline bool sk_under_memory_pressure(const struct sock *sk) +{ + if (!sk->sk_prot->memory_pressure) + return false; + return !!*sk->sk_prot->memory_pressure; +} + +static inline void sk_leave_memory_pressure(struct sock *sk) +{ + int *memory_pressure = sk->sk_prot->memory_pressure; + + if (memory_pressure && *memory_pressure) + *memory_pressure = 0; +} + +static inline void sk_enter_memory_pressure(struct sock *sk) +{ + if (sk->sk_prot->enter_memory_pressure) + sk->sk_prot->enter_memory_pressure(sk); +} + +static inline long sk_prot_mem_limits(const struct sock *sk, int index) +{ + long *prot = sk->sk_prot->sysctl_mem; + return prot[index]; +} + +static inline long +sk_memory_allocated(const struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + return atomic_long_read(prot->memory_allocated); +} + +static inline long +sk_memory_allocated_add(struct sock *sk, int amt) +{ + struct proto *prot = sk->sk_prot; + return atomic_long_add_return(amt, prot->memory_allocated); +} + +static inline void +sk_memory_allocated_sub(struct sock *sk, int amt) +{ + struct proto *prot = sk->sk_prot; + atomic_long_sub(amt, prot->memory_allocated); +} + +static inline void sk_sockets_allocated_dec(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + percpu_counter_dec(prot->sockets_allocated); +} + +static inline void sk_sockets_allocated_inc(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + percpu_counter_inc(prot->sockets_allocated); +} + +static inline int +sk_sockets_allocated_read_positive(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + + return percpu_counter_sum_positive(prot->sockets_allocated); +} + +static inline int +proto_sockets_allocated_sum_positive(struct proto *prot) +{ + return percpu_counter_sum_positive(prot->sockets_allocated); +} + +static inline long +proto_memory_allocated(struct proto *prot) +{ + return atomic_long_read(prot->memory_allocated); +} + +static inline bool +proto_memory_pressure(struct proto *prot) +{ + if (!prot->memory_pressure) + return false; + return !!*prot->memory_pressure; +} + #ifdef CONFIG_PROC_FS /* Called with local bh disabled */ @@ -1674,7 +1768,7 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) page = alloc_pages(sk->sk_allocation, 0); if (!page) { - sk->sk_prot->enter_memory_pressure(sk); + sk_enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); } return page; -- cgit v1.2.3 From e1aab161e0135aafcd439be20b4f35e4b0922d95 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sun, 11 Dec 2011 21:47:03 +0000 Subject: socket: initial cgroup code. The goal of this work is to move the memory pressure tcp controls to a cgroup, instead of just relying on global conditions. To avoid excessive overhead in the network fast paths, the code that accounts allocated memory to a cgroup is hidden inside a static_branch(). This branch is patched out until the first non-root cgroup is created. So when nobody is using cgroups, even if it is mounted, no significant performance penalty should be seen. This patch handles the generic part of the code, and has nothing tcp-specific. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki CC: Kirill A. Shutemov CC: David S. Miller CC: Eric W. Biederman CC: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 150 insertions(+), 6 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index ed0dbf034539..d5eab256167c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -168,6 +169,7 @@ struct sock_common { /* public: */ }; +struct cg_proto; /** * struct sock - network layer representation of sockets * @__sk_common: shared layout with inet_timewait_sock @@ -228,6 +230,7 @@ struct sock_common { * @sk_security: used by security modules * @sk_mark: generic packet mark * @sk_classid: this socket's cgroup classid + * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed @@ -342,6 +345,7 @@ struct sock { #endif __u32 sk_mark; u32 sk_classid; + struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk, int bytes); void (*sk_write_space)(struct sock *sk); @@ -838,6 +842,37 @@ struct proto { #ifdef SOCK_REFCNT_DEBUG atomic_t socks; #endif +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM + /* + * cgroup specific init/deinit functions. Called once for all + * protocols that implement it, from cgroups populate function. + * This function has to setup any files the protocol want to + * appear in the kmem cgroup filesystem. + */ + int (*init_cgroup)(struct cgroup *cgrp, + struct cgroup_subsys *ss); + void (*destroy_cgroup)(struct cgroup *cgrp, + struct cgroup_subsys *ss); + struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); +#endif +}; + +struct cg_proto { + void (*enter_memory_pressure)(struct sock *sk); + struct res_counter *memory_allocated; /* Current allocated memory. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ + int *memory_pressure; + long *sysctl_mem; + /* + * memcg field is used to find which memcg we belong directly + * Each memcg struct can hold more than one cg_proto, so container_of + * won't really cut. + * + * The elegant solution would be having an inverse function to + * proto_cgroup in struct proto, but that means polluting the structure + * for everybody, instead of just for memcg users. + */ + struct mem_cgroup *memcg; }; extern int proto_register(struct proto *prot, int alloc_slab); @@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); } -static inline void sk_refcnt_debug_release(const struct sock *sk) +inline void sk_refcnt_debug_release(const struct sock *sk) { if (atomic_read(&sk->sk_refcnt) != 1) printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", @@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ +#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +extern struct jump_label_key memcg_socket_limit_enabled; +static inline struct cg_proto *parent_cg_proto(struct proto *proto, + struct cg_proto *cg_proto) +{ + return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); +} +#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) +#else +#define mem_cgroup_sockets_enabled 0 +static inline struct cg_proto *parent_cg_proto(struct proto *proto, + struct cg_proto *cg_proto) +{ + return NULL; +} +#endif + + static inline bool sk_has_memory_pressure(const struct sock *sk) { return sk->sk_prot->memory_pressure != NULL; @@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) { if (!sk->sk_prot->memory_pressure) return false; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!*sk->sk_cgrp->memory_pressure; + return !!*sk->sk_prot->memory_pressure; } @@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk) { int *memory_pressure = sk->sk_prot->memory_pressure; - if (memory_pressure && *memory_pressure) + if (!memory_pressure) + return; + + if (*memory_pressure) *memory_pressure = 0; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { + struct cg_proto *cg_proto = sk->sk_cgrp; + struct proto *prot = sk->sk_prot; + + for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) + if (*cg_proto->memory_pressure) + *cg_proto->memory_pressure = 0; + } + } static inline void sk_enter_memory_pressure(struct sock *sk) { - if (sk->sk_prot->enter_memory_pressure) - sk->sk_prot->enter_memory_pressure(sk); + if (!sk->sk_prot->enter_memory_pressure) + return; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { + struct cg_proto *cg_proto = sk->sk_cgrp; + struct proto *prot = sk->sk_prot; + + for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) + cg_proto->enter_memory_pressure(sk); + } + + sk->sk_prot->enter_memory_pressure(sk); } static inline long sk_prot_mem_limits(const struct sock *sk, int index) { long *prot = sk->sk_prot->sysctl_mem; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + prot = sk->sk_cgrp->sysctl_mem; return prot[index]; } +static inline void memcg_memory_allocated_add(struct cg_proto *prot, + unsigned long amt, + int *parent_status) +{ + struct res_counter *fail; + int ret; + + ret = res_counter_charge(prot->memory_allocated, + amt << PAGE_SHIFT, &fail); + + if (ret < 0) + *parent_status = OVER_LIMIT; +} + +static inline void memcg_memory_allocated_sub(struct cg_proto *prot, + unsigned long amt) +{ + res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT); +} + +static inline u64 memcg_memory_allocated_read(struct cg_proto *prot) +{ + u64 ret; + ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE); + return ret >> PAGE_SHIFT; +} + static inline long sk_memory_allocated(const struct sock *sk) { struct proto *prot = sk->sk_prot; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return memcg_memory_allocated_read(sk->sk_cgrp); + return atomic_long_read(prot->memory_allocated); } static inline long -sk_memory_allocated_add(struct sock *sk, int amt) +sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) { struct proto *prot = sk->sk_prot; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { + memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); + /* update the root cgroup regardless */ + atomic_long_add_return(amt, prot->memory_allocated); + return memcg_memory_allocated_read(sk->sk_cgrp); + } + return atomic_long_add_return(amt, prot->memory_allocated); } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt) +sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status) { struct proto *prot = sk->sk_prot; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp && + parent_status != OVER_LIMIT) /* Otherwise was uncharged already */ + memcg_memory_allocated_sub(sk->sk_cgrp, amt); + atomic_long_sub(amt, prot->memory_allocated); } static inline void sk_sockets_allocated_dec(struct sock *sk) { struct proto *prot = sk->sk_prot; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { + struct cg_proto *cg_proto = sk->sk_cgrp; + + for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) + percpu_counter_dec(cg_proto->sockets_allocated); + } + percpu_counter_dec(prot->sockets_allocated); } static inline void sk_sockets_allocated_inc(struct sock *sk) { struct proto *prot = sk->sk_prot; + + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { + struct cg_proto *cg_proto = sk->sk_cgrp; + + for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) + percpu_counter_inc(cg_proto->sockets_allocated); + } + percpu_counter_inc(prot->sockets_allocated); } @@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk) { struct proto *prot = sk->sk_prot; + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated); + return percpu_counter_sum_positive(prot->sockets_allocated); } -- cgit v1.2.3 From d1a4c0b37c296e600ffe08edb0db2dc1b8f550d7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Sun, 11 Dec 2011 21:47:04 +0000 Subject: tcp memory pressure controls This patch introduces memory pressure controls for the tcp protocol. It uses the generic socket memory pressure code introduced in earlier patches, and fills in the necessary data in cg_proto struct. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki CC: Eric W. Biederman Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index d5eab256167c..18ecc9919d29 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -64,6 +64,8 @@ #include #include +int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); +void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss); /* * This structure really needs to be cleaned up. * Most of it is for TCP, and not used by any of -- cgit v1.2.3 From 9f048bfba15a22d1d1ce0c1f44567fa16bed4d25 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Dec 2011 03:59:08 +0000 Subject: net: fix build error if CONFIG_CGROUPS=n Reported-by: Christoph Paasch Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 18ecc9919d29..6fe0dae81451 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -64,6 +64,8 @@ #include #include +struct cgroup; +struct cgroup_subsys; int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss); /* -- cgit v1.2.3 From c607b2ed84929e143d9fb5653c4b5d0109147cde Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 16 Dec 2011 00:52:00 +0000 Subject: net: fix compilation with !CONFIG_NET Reported-by: Randy Dunlap Signed-off-by: Glauber Costa CC: Hiroyouki Kamezawa CC: David S. Miller CC: Eric Dumazet CC: Stephen Rothwell Signed-off-by: David S. Miller --- include/net/sock.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 6fe0dae81451..3144c7950649 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -66,8 +66,20 @@ struct cgroup; struct cgroup_subsys; +#ifdef CONFIG_NET int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss); +#else +static inline +int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ + return 0; +} +static inline +void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +{ +} +#endif /* * This structure really needs to be cleaned up. * Most of it is for TCP, and not used by any of -- cgit v1.2.3 From 0fd7bac6b6157eed6cf0cb86a1e88ba29e57c033 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2011 07:11:44 +0000 Subject: net: relax rcvbuf limits skb->truesize might be big even for a small packet. Its even bigger after commit 87fb4b7b533 (net: more accurate skb truesize) and big MTU. We should allow queueing at least one packet per receiver, even with a low RCVBUF setting. Reported-by: Michal Simek Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index abb6e0f0c3c3..32e39371fba6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -637,12 +637,14 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) /* * Take into account size of receive queue and backlog queue + * Do not take into account this skb truesize, + * to allow even a single big packet to come. */ static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) { unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); - return qsize + skb->truesize > sk->sk_rcvbuf; + return qsize > sk->sk_rcvbuf; } /* The per-socket spinlock must be held here. */ -- cgit v1.2.3 From 1a3bc369ba547c11ca8b3ed079d7584f27499e70 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 21 Jan 2012 09:03:10 +0000 Subject: kernel-doc: fix new warning in net/sock.h Fix new kernel-doc warning: Warning(include/net/sock.h:372): No description found for parameter 'sk_cgrp_prioidx' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index bb972d254dff..97fc0ad47da0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -226,6 +226,7 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting + * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer -- cgit v1.2.3 From 376be5ff8a6a36efadd131860cf26841f366d44c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 20 Jan 2012 04:57:14 +0000 Subject: net: fix socket memcg build with !CONFIG_NET There is still a build bug with the sock memcg code, that triggers with !CONFIG_NET, that survived my series of randconfig builds. Signed-off-by: Glauber Costa Reported-by: Randy Dunlap CC: Hiroyouki Kamezawa Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 97fc0ad47da0..0e7a9b05f92b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -922,7 +922,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) extern struct jump_label_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) -- cgit v1.2.3 From 0e90b31f4ba77027a7c21cbfc66404df0851ca21 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 20 Jan 2012 04:57:16 +0000 Subject: net: introduce res_counter_charge_nofail() for socket allocations There is a case in __sk_mem_schedule(), where an allocation is beyond the maximum, but yet we are allowed to proceed. It happens under the following condition: sk->sk_wmem_queued + size >= sk->sk_sndbuf The network code won't revert the allocation in this case, meaning that at some point later it'll try to do it. Since this is never communicated to the underlying res_counter code, there is an inbalance in res_counter uncharge operation. I see two ways of fixing this: 1) storing the information about those allocations somewhere in memcg, and then deducting from that first, before we start draining the res_counter, 2) providing a slightly different allocation function for the res_counter, that matches the original behavior of the network code more closely. I decided to go for #2 here, believing it to be more elegant, since #1 would require us to do basically that, but in a more obscure way. Signed-off-by: Glauber Costa Cc: KAMEZAWA Hiroyuki Cc: Johannes Weiner Cc: Michal Hocko CC: Tejun Heo CC: Li Zefan CC: Laurent Chavey Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 0e7a9b05f92b..4c69ac165e6b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1008,9 +1008,8 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, struct res_counter *fail; int ret; - ret = res_counter_charge(prot->memory_allocated, - amt << PAGE_SHIFT, &fail); - + ret = res_counter_charge_nofail(prot->memory_allocated, + amt << PAGE_SHIFT, &fail); if (ret < 0) *parent_status = OVER_LIMIT; } @@ -1054,12 +1053,11 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status) +sk_memory_allocated_sub(struct sock *sk, int amt) { struct proto *prot = sk->sk_prot; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - parent_status != OVER_LIMIT) /* Otherwise was uncharged already */ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) memcg_memory_allocated_sub(sk->sk_cgrp, amt); atomic_long_sub(amt, prot->memory_allocated); -- cgit v1.2.3 From 9018e93948c6f8f95fbcc9fa05f6c403d6adb406 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 26 Jan 2012 12:09:28 +0000 Subject: net: explicitly add jump_label.h header to sock.h Commit 36a1211970193ce215de50ed1e4e1272bc814df1 removed linux/module.h include statement from one of the headers that end up in net/sock.h. It was providing us with static_branch() definition implicitly, so after its removal the build got broken. To fix this, and avoid having this happening in the future, let me do the right thing and include linux/jump_label.h explicitly in sock.h. Signed-off-by: Glauber Costa Reported-by: Randy Dunlap CC: David S. Miller Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 4c69ac165e6b..91c1c8baf020 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,6 +55,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3