From 08c0cad69f32ad1e881fa3fb7f5e0a25db5b07ce Mon Sep 17 00:00:00 2001 From: Valentina Giusti Date: Fri, 20 Dec 2013 17:28:53 +0100 Subject: netfilter: nfnetlink_queue: enable UID/GID socket info retrieval Thanks to commits 41063e9 (ipv4: Early TCP socket demux) and 421b388 (udp: ipv4: Add udp early demux) it is now possible to parse UID and GID socket info also for incoming TCP and UDP connections. Having this info available, it is convenient to let NFQUEUE parse it in order to improve and refine the traffic analysis in userspace. Signed-off-by: Valentina Giusti Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_queue.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h index 0132bad79de7..8dd819e2b5fe 100644 --- a/include/uapi/linux/netfilter/nfnetlink_queue.h +++ b/include/uapi/linux/netfilter/nfnetlink_queue.h @@ -47,6 +47,8 @@ enum nfqnl_attr_type { NFQA_CAP_LEN, /* __u32 length of captured packet */ NFQA_SKB_INFO, /* __u32 skb meta information */ NFQA_EXP, /* nf_conntrack_netlink.h */ + NFQA_UID, /* __u32 sk uid */ + NFQA_GID, /* __u32 sk gid */ __NFQA_MAX }; @@ -99,7 +101,8 @@ enum nfqnl_attr_config { #define NFQA_CFG_F_FAIL_OPEN (1 << 0) #define NFQA_CFG_F_CONNTRACK (1 << 1) #define NFQA_CFG_F_GSO (1 << 2) -#define NFQA_CFG_F_MAX (1 << 3) +#define NFQA_CFG_F_UID_GID (1 << 3) +#define NFQA_CFG_F_MAX (1 << 4) /* flags for NFQA_SKB_INFO */ /* packet appears to have wrong checksums, but they are ok */ -- cgit v1.2.3 From 6a649f339802f104549e1fb211e381036661e244 Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Wed, 18 Dec 2013 11:27:02 +0800 Subject: netfilter: add IPv4/6 IPComp extension match support With this plugin, user could specify IPComp tagged with certain CPI that host not interested will be DROPped or any other action. For example: iptables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP ip6tables -A INPUT -p 108 -m ipcomp --ipcompspi 0x87 -j DROP Then input IPComp packet with CPI equates 0x87 will not reach upper layer anymore. Signed-off-by: Fan Du Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/xt_ipcomp.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_ipcomp.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 17c3af2c4bb9..91be8ce623f0 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -54,6 +54,7 @@ header-y += xt_ecn.h header-y += xt_esp.h header-y += xt_hashlimit.h header-y += xt_helper.h +header-y += xt_ipcomp.h header-y += xt_iprange.h header-y += xt_ipvs.h header-y += xt_length.h diff --git a/include/uapi/linux/netfilter/xt_ipcomp.h b/include/uapi/linux/netfilter/xt_ipcomp.h new file mode 100644 index 000000000000..45c7e40eb8e1 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_ipcomp.h @@ -0,0 +1,16 @@ +#ifndef _XT_IPCOMP_H +#define _XT_IPCOMP_H + +#include + +struct xt_ipcomp { + __u32 spis[2]; /* Security Parameter Index */ + __u8 invflags; /* Inverse flags */ + __u8 hdrres; /* Test of the Reserved Filed */ +}; + +/* Values for "invflags" field in struct xt_ipcomp. */ +#define XT_IPCOMP_INV_SPI 0x01 /* Invert the sense of spi. */ +#define XT_IPCOMP_INV_MASK 0x01 /* All possible flags. */ + +#endif /*_XT_IPCOMP_H*/ -- cgit v1.2.3 From 34ce324019e76f6d93768d68343a0e78f464d754 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Dec 2013 22:40:29 +0100 Subject: netfilter: nf_nat: add full port randomization support We currently use prandom_u32() for allocation of ports in tcp bind(0) and udp code. In case of plain SNAT we try to keep the ports as is or increment on collision. SNAT --random mode does use per-destination incrementing port allocation. As a recent paper pointed out in [1] that this mode of port allocation makes it possible to an attacker to find the randomly allocated ports through a timing side-channel in a socket overloading attack conducted through an off-path attacker. So, NF_NAT_RANGE_PROTO_RANDOM actually weakens the port randomization in regard to the attack described in this paper. As we need to keep compatibility, add another flag called NF_NAT_RANGE_PROTO_RANDOM_FULLY that would replace the NF_NAT_RANGE_PROTO_RANDOM hash-based port selection algorithm with a simple prandom_u32() in order to mitigate this attack vector. Note that the lfsr113's internal state is periodically reseeded by the kernel through a local secure entropy source. More details can be found in [1], the basic idea is to send bursts of packets to a socket to overflow its receive queue and measure the latency to detect a possible retransmit when the port is found. Because of increasing ports to given destination and port, further allocations can be predicted. This information could then be used by an attacker for e.g. for cache-poisoning, NS pinning, and degradation of service attacks against DNS servers [1]: The best defense against the poisoning attacks is to properly deploy and validate DNSSEC; DNSSEC provides security not only against off-path attacker but even against MitM attacker. We hope that our results will help motivate administrators to adopt DNSSEC. However, full DNSSEC deployment make take significant time, and until that happens, we recommend short-term, non-cryptographic defenses. We recommend to support full port randomisation, according to practices recommended in [2], and to avoid per-destination sequential port allocation, which we show may be vulnerable to derandomisation attacks. Joint work between Hannes Frederic Sowa and Daniel Borkmann. [1] https://sites.google.com/site/hayashulman/files/NIC-derandomisation.pdf [2] http://arxiv.org/pdf/1205.5190v1.pdf Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_nat.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h index bf0cc373ffb6..1ad3659102b6 100644 --- a/include/uapi/linux/netfilter/nf_nat.h +++ b/include/uapi/linux/netfilter/nf_nat.h @@ -4,10 +4,14 @@ #include #include -#define NF_NAT_RANGE_MAP_IPS 1 -#define NF_NAT_RANGE_PROTO_SPECIFIED 2 -#define NF_NAT_RANGE_PROTO_RANDOM 4 -#define NF_NAT_RANGE_PERSISTENT 8 +#define NF_NAT_RANGE_MAP_IPS (1 << 0) +#define NF_NAT_RANGE_PROTO_SPECIFIED (1 << 1) +#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2) +#define NF_NAT_RANGE_PERSISTENT (1 << 3) +#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) + +#define NF_NAT_RANGE_PROTO_RANDOM_ALL \ + (NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY) struct nf_nat_ipv4_range { unsigned int flags; -- cgit v1.2.3 From 82a37132f300ea53bdcd812917af5a6329ec80c3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sun, 29 Dec 2013 18:27:12 +0100 Subject: netfilter: x_tables: lightweight process control group matching It would be useful e.g. in a server or desktop environment to have a facility in the notion of fine-grained "per application" or "per application group" firewall policies. Probably, users in the mobile, embedded area (e.g. Android based) with different security policy requirements for application groups could have great benefit from that as well. For example, with a little bit of configuration effort, an admin could whitelist well-known applications, and thus block otherwise unwanted "hard-to-track" applications like [1] from a user's machine. Blocking is just one example, but it is not limited to that, meaning we can have much different scenarios/policies that netfilter allows us than just blocking, e.g. fine grained settings where applications are allowed to connect/send traffic to, application traffic marking/conntracking, application-specific packet mangling, and so on. Implementation of PID-based matching would not be appropriate as they frequently change, and child tracking would make that even more complex and ugly. Cgroups would be a perfect candidate for accomplishing that as they associate a set of tasks with a set of parameters for one or more subsystems, in our case the netfilter subsystem, which, of course, can be combined with other cgroup subsystems into something more complex if needed. As mentioned, to overcome this constraint, such processes could be placed into one or multiple cgroups where different fine-grained rules can be defined depending on the application scenario, while e.g. everything else that is not part of that could be dropped (or vice versa), thus making life harder for unwanted processes to communicate to the outside world. So, we make use of cgroups here to track jobs and limit their resources in terms of iptables policies; in other words, limiting, tracking, etc what they are allowed to communicate. In our case we're working on outgoing traffic based on which local socket that originated from. Also, one doesn't even need to have an a-prio knowledge of the application internals regarding their particular use of ports or protocols. Matching is *extremly* lightweight as we just test for the sk_classid marker of sockets, originating from net_cls. net_cls and netfilter do not contradict each other; in fact, each construct can live as standalone or they can be used in combination with each other, which is perfectly fine, plus it serves Tejun's requirement to not introduce a new cgroups subsystem. Through this, we result in a very minimal and efficient module, and don't add anything except netfilter code. One possible, minimal usage example (many other iptables options can be applied obviously): 1) Configuring cgroups if not already done, e.g.: mkdir /sys/fs/cgroup/net_cls mount -t cgroup -o net_cls net_cls /sys/fs/cgroup/net_cls mkdir /sys/fs/cgroup/net_cls/0 echo 1 > /sys/fs/cgroup/net_cls/0/net_cls.classid (resp. a real flow handle id for tc) 2) Configuring netfilter (iptables-nftables), e.g.: iptables -A OUTPUT -m cgroup ! --cgroup 1 -j DROP 3) Running applications, e.g.: ping 208.67.222.222 echo 1799 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.222.222: icmp_seq=44 ttl=49 time=11.9 ms [...] ping 208.67.220.220 ping: sendmsg: Operation not permitted [...] echo 1804 > /sys/fs/cgroup/net_cls/0/tasks 64 bytes from 208.67.220.220: icmp_seq=89 ttl=56 time=19.0 ms [...] Of course, real-world deployments would make use of cgroups user space toolsuite, or own custom policy daemons dynamically moving applications from/to various cgroups. [1] http://www.blackhat.com/presentations/bh-europe-06/bh-eu-06-biondi/bh-eu-06-biondi-up.pdf Signed-off-by: Daniel Borkmann Cc: Tejun Heo Cc: cgroups@vger.kernel.org Acked-by: Li Zefan Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + include/uapi/linux/netfilter/xt_cgroup.h | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 include/uapi/linux/netfilter/xt_cgroup.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 91be8ce623f0..2344f5a319fc 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -39,6 +39,7 @@ header-y += xt_TEE.h header-y += xt_TPROXY.h header-y += xt_addrtype.h header-y += xt_bpf.h +header-y += xt_cgroup.h header-y += xt_cluster.h header-y += xt_comment.h header-y += xt_connbytes.h diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h new file mode 100644 index 000000000000..43acb7e175f6 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -0,0 +1,11 @@ +#ifndef _UAPI_XT_CGROUP_H +#define _UAPI_XT_CGROUP_H + +#include + +struct xt_cgroup_info { + __u32 id; + __u32 invert; +}; + +#endif /* _UAPI_XT_CGROUP_H */ -- cgit v1.2.3