From f01ec1c017dead42092997a2b8684fcab4cbf126 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 24 Apr 2014 10:02:49 +0200 Subject: vxlan: add x-netns support This patch allows to switch the netns when packet is encapsulated or decapsulated. The vxlan socket is openned into the i/o netns, ie into the netns where encapsulated packets are received. The socket lookup is done into this netns to find the corresponding vxlan tunnel. After decapsulation, the packet is injecting into the corresponding interface which may stand to another netns. When one of the two netns is removed, the tunnel is destroyed. Configuration example: ip netns add netns1 ip netns exec netns1 ip link set lo up ip link add vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0 ip link set vxlan10 netns netns1 ip netns exec netns1 ip addr add 192.168.0.249/24 broadcast 192.168.0.255 dev vxlan10 ip netns exec netns1 ip link set vxlan10 up Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/openvswitch/vport-vxlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index e797a50ac2be..21cceb3bdf78 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); + htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8), + false); if (err < 0) ip_rt_put(rt); error: -- cgit v1.2.3 From 60ff746739bf805a912484643c720b6124826140 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 4 May 2014 16:39:18 -0700 Subject: net: rename local_df to ignore_df MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested by several people, rename local_df to ignore_df, since it means "ignore df bit if it is set". Cc: Maciej Żenczykowski Cc: Florian Westphal Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/openvswitch/vport-gre.c | 2 +- net/openvswitch/vport-vxlan.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index ebb6e2442554..0856f014d7a9 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; return iptunnel_xmit(skb->sk, rt, skb, fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 21cceb3bdf78..a93efa3f64c3 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - skb->local_df = 1; + skb->ignore_df = 1; inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); -- cgit v1.2.3 From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001 From: Wilfried Klaebe Date: Sun, 11 May 2014 00:12:32 +0000 Subject: net: get rid of SET_ETHTOOL_OPS net: get rid of SET_ETHTOOL_OPS Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone. This does that. Mostly done via coccinelle script: @@ struct ethtool_ops *ops; struct net_device *dev; @@ - SET_ETHTOOL_OPS(dev, ops); + dev->ethtool_ops = ops; Compile tested only, but I'd seriously wonder if this broke anything. Suggested-by: Dave Miller Signed-off-by: Wilfried Klaebe Acked-by: Felipe Balbi Signed-off-by: David S. Miller --- net/openvswitch/vport-internal_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 729c68763fe7..789af9280e77 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; - SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); + netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | -- cgit v1.2.3 From 7085130bab2f9c5b8d61bff73b01dc8195d0f974 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Thu, 23 Jan 2014 10:56:49 -0800 Subject: openvswitch: use const in some local vars and casts In few functions, const formal parameters are assigned or cast to non-const. These changes suppress warnings if compiled with -Wcast-qual. Signed-off-by: Daniele Di Proietto Signed-off-by: Jesse Gross --- net/openvswitch/flow_netlink.c | 6 ++++-- net/openvswitch/flow_table.c | 16 +++++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4d000acaed0d..5517bd62e39b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -628,8 +628,10 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple if (is_mask && exact_5tuple && *exact_5tuple) { if (ipv6_key->ipv6_proto != 0xff || - !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) || - !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst))) + !is_all_set((const u8 *)ipv6_key->ipv6_src, + sizeof(match->key->ipv6.addr.src)) || + !is_all_set((const u8 *)ipv6_key->ipv6_dst, + sizeof(match->key->ipv6.addr.dst))) *exact_5tuple = false; } } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3c268b3d71c3..1ba1e0b8ade5 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -57,8 +57,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range) void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask) { - const long *m = (long *)((u8 *)&mask->key + mask->range.start); - const long *s = (long *)((u8 *)src + mask->range.start); + const long *m = (const long *)((const u8 *)&mask->key + + mask->range.start); + const long *s = (const long *)((const u8 *)src + + mask->range.start); long *d = (long *)((u8 *)dst + mask->range.start); int i; @@ -375,7 +377,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) static u32 flow_hash(const struct sw_flow_key *key, int key_start, int key_end) { - u32 *hash_key = (u32 *)((u8 *)key + key_start); + const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; /* Make sure number of hash bytes are multiple of u32. */ @@ -397,8 +399,8 @@ static bool cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { - const long *cp1 = (long *)((u8 *)key1 + key_start); - const long *cp2 = (long *)((u8 *)key2 + key_start); + const long *cp1 = (const long *)((const u8 *)key1 + key_start); + const long *cp2 = (const long *)((const u8 *)key2 + key_start); long diffs = 0; int i; @@ -513,8 +515,8 @@ static struct sw_flow_mask *mask_alloc(void) static bool mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { - u8 *a_ = (u8 *)&a->key + a->range.start; - u8 *b_ = (u8 *)&b->key + b->range.start; + const u8 *a_ = (const u8 *)&a->key + a->range.start; + const u8 *b_ = (const u8 *)&b->key + b->range.start; return (a->range.end == b->range.end) && (a->range.start == b->range.start) -- cgit v1.2.3 From d0b4da137508db3d38998eae7f62c0f9699ee08c Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 3 Feb 2014 14:07:43 -0800 Subject: openvswitch: avoid warnings in vport_from_priv This change, firstly, avoids declaring the formal parameter const, since it is treated as non const. (to avoid -Wcast-qual) Secondly, it cast the pointer from void* to u8*, since it is used in arithmetic (to avoid -Wpointer-arith) Signed-off-by: Daniele Di Proietto Signed-off-by: Jesse Gross --- net/openvswitch/vport.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index d7e50a17396c..3e12940985a2 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport) * the result of a hash table lookup. @priv must point to the start of the * private data area. */ -static inline struct vport *vport_from_priv(const void *priv) +static inline struct vport *vport_from_priv(void *priv) { - return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); + return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } void ovs_vport_receive(struct vport *, struct sk_buff *, -- cgit v1.2.3 From 07dc0602c5976cfc36ccffe5c6d73234f204d585 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 3 Feb 2014 14:08:29 -0800 Subject: openvswitch: avoid cast-qual warning in vport_priv This function must cast a const value to a non const value. By adding an uintptr_t cast the warning is suppressed. To avoid the cast (proper solution) several function signatures must be changed. Signed-off-by: Daniele Di Proietto Signed-off-by: Jesse Gross --- net/openvswitch/vport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3e12940985a2..8d721e62f388 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport); */ static inline void *vport_priv(const struct vport *vport) { - return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); + return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); } /** -- cgit v1.2.3 From cc23ebf3bb4348fb022c0d25494307459bb2e539 Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 3 Feb 2014 14:09:01 -0800 Subject: openvswitch: Added (unsigned long long) cast in printf This is necessary, since u64 is not unsigned long long in all architectures: u64 could be also uint64_t. Signed-off-by: Daniele Di Proietto Signed-off-by: Jesse Gross --- net/openvswitch/flow_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5517bd62e39b..1b22ad2de0c2 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -216,14 +216,14 @@ static bool match_validate(const struct sw_flow_match *match, if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", - key_attrs, key_expected); + (unsigned long long)key_attrs, (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", - mask_attrs, mask_allowed); + (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); return false; } -- cgit v1.2.3 From 1815a8831fb04c60d73627816cb0b596266e9bee Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 3 Feb 2014 17:06:46 -0800 Subject: openvswitch: Use net_ratelimit in OVS_NLERR Each use of pr__once has a per-site flag. Some of the OVS_NLERR messages look as if seeing them multiple times could be useful, so use net_ratelimit() instead of pr_info_once. Signed-off-by: Joe Perches Signed-off-by: Jesse Gross --- net/openvswitch/datapath.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 05317380fc03..7ede507500d7 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); void ovs_dp_notify_wq(struct work_struct *work); -#define OVS_NLERR(fmt, ...) \ - pr_info_once("netlink: " fmt, ##__VA_ARGS__) - +#define OVS_NLERR(fmt, ...) \ +do { \ + if (net_ratelimit()) \ + pr_info("netlink: " fmt, ##__VA_ARGS__); \ +} while (0) #endif /* datapath.h */ -- cgit v1.2.3 From 2235ad1c3ac545bd8fc2c026be5be16d98b9a891 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 3 Feb 2014 17:18:21 -0800 Subject: openvswitch: flow_netlink: Use pr_fmt to OVS_NLERR output Add "openvswitch: " prefix to OVS_NLERR output to match the other OVS_NLERR output of datapath.c Signed-off-by: Joe Perches Signed-off-by: Jesse Gross --- net/openvswitch/flow_netlink.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1b22ad2de0c2..7bd09b75ebaa 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -16,6 +16,8 @@ * 02110-1301, USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include "flow.h" #include "datapath.h" #include -- cgit v1.2.3 From 8c63ff09bddf944ab0033fea97aacfadfffa76de Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 18 Feb 2014 11:15:45 -0800 Subject: openvswitch: Use ether_addr_copy It's slightly smaller/faster for some architectures. Signed-off-by: Joe Perches Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 4 ++-- net/openvswitch/flow.c | 16 ++++++++-------- net/openvswitch/flow_netlink.c | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 2c77e7b1a913..c36856a457ca 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb, skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); - memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); - memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2998989e76db..332aa01492f1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -372,14 +372,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) goto invalid; - memcpy(key->ipv6.nd.sll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.sll, + &nd->opt[offset+sizeof(*nd_opt)]); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) goto invalid; - memcpy(key->ipv6.nd.tll, - &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + ether_addr_copy(key->ipv6.nd.tll, + &nd->opt[offset+sizeof(*nd_opt)]); } icmp_len -= opt_len; @@ -439,8 +439,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) * header in the linear data area. */ eth = eth_hdr(skb); - memcpy(key->eth.src, eth->h_source, ETH_ALEN); - memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); + ether_addr_copy(key->eth.src, eth->h_source); + ether_addr_copy(key->eth.dst, eth->h_dest); __skb_pull(skb, 2 * ETH_ALEN); /* We are going to push all headers that we pull, so no need to @@ -538,8 +538,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) key->ip.proto = ntohs(arp->ar_op); memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); - memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); - memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); + ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7bd09b75ebaa..5511ad18be51 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -986,8 +986,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; eth_key = nla_data(nla); - memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); - memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); + ether_addr_copy(eth_key->eth_src, output->eth.src); + ether_addr_copy(eth_key->eth_dst, output->eth.dst); if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { __be16 eth_type; @@ -1059,8 +1059,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_op = htons(output->ip.proto); - memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); - memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); + ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); + ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1147,8 +1147,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, nd_key = nla_data(nla); memcpy(nd_key->nd_target, &output->ipv6.nd.target, sizeof(nd_key->nd_target)); - memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); - memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); + ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); + ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); } } } -- cgit v1.2.3 From 23dabf88abb48a866fdb19ee08ebcf1ddd9b1840 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 27 Mar 2014 12:35:23 -0700 Subject: openvswitch: Remove 5-tuple optimization. The 5-tuple optimization becomes unnecessary with a later per-NUMA node stats patch. Remove it first to make the changes easier to grasp. Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 11 ++++---- net/openvswitch/flow.c | 32 +++++++++-------------- net/openvswitch/flow.h | 10 +------- net/openvswitch/flow_netlink.c | 58 +++--------------------------------------- net/openvswitch/flow_netlink.h | 1 - net/openvswitch/flow_table.c | 31 +++++++--------------- net/openvswitch/flow_table.h | 2 +- 7 files changed, 32 insertions(+), 113 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a3276e3c4feb..8867d7e2d65b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -524,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ - flow = ovs_flow_alloc(false); + flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; @@ -782,7 +782,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; - bool exact_5tuple; int error; /* Extract key. */ @@ -791,7 +790,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; ovs_match_init(&match, &key, &mask); - error = ovs_nla_get_match(&match, &exact_5tuple, + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; @@ -830,7 +829,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_unlock_ovs; /* Allocate flow. */ - flow = ovs_flow_alloc(!exact_5tuple); + flow = ovs_flow_alloc(); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; @@ -914,7 +913,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; @@ -968,7 +967,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 332aa01492f1..aad7a8da70b1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -66,10 +66,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) struct flow_stats *stats; __be16 tcp_flags = 0; - if (!flow->stats.is_percpu) - stats = flow->stats.stat; - else - stats = this_cpu_ptr(flow->stats.cpu_stats); + stats = this_cpu_ptr(flow->stats); if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && @@ -110,16 +107,14 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); - } else { - for_each_possible_cpu(cpu) { - struct flow_stats *stats; - - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); - } + + for_each_possible_cpu(cpu) { + struct flow_stats *stats; + + stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); + stats_read(stats, ovs_stats, used, tcp_flags); } + local_bh_enable(); } @@ -138,13 +133,10 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu; local_bh_disable(); - if (!flow->stats.is_percpu) { - stats_reset(flow->stats.stat); - } else { - for_each_possible_cpu(cpu) { - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - } - } + + for_each_possible_cpu(cpu) + stats_reset(per_cpu_ptr(flow->stats, cpu)); + local_bh_enable(); } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 2d770e28a3a3..9c0dd8aa3117 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -155,14 +155,6 @@ struct flow_stats { __be16 tcp_flags; /* Union of seen TCP flags. */ }; -struct sw_flow_stats { - bool is_percpu; - union { - struct flow_stats *stat; - struct flow_stats __percpu *cpu_stats; - }; -}; - struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; @@ -172,7 +164,7 @@ struct sw_flow { struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct sw_flow_stats stats; + struct flow_stats __percpu *stats; }; struct arp_eth_header { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5511ad18be51..84caa99b3769 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -268,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size) return true; } -static bool is_all_set(const u8 *fp, size_t size) -{ - int i; - - if (!fp) - return false; - - for (i = 0; i < size; i++) - if (fp[i] != 0xff) - return false; - - return true; -} - static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) @@ -503,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return 0; } -static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple, - u64 attrs, const struct nlattr **a, - bool is_mask) +static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, + const struct nlattr **a, bool is_mask) { int err; u64 orig_attrs = attrs; @@ -562,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } - if (is_mask && exact_5tuple) { - if (match->mask->key.eth.type != htons(0xffff)) - *exact_5tuple = false; - } - if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; @@ -589,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv4_key->ipv4_proto != 0xff || - ipv4_key->ipv4_src != htonl(0xffffffff) || - ipv4_key->ipv4_dst != htonl(0xffffffff)) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { @@ -627,15 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); - - if (is_mask && exact_5tuple && *exact_5tuple) { - if (ipv6_key->ipv6_proto != 0xff || - !is_all_set((const u8 *)ipv6_key->ipv6_src, - sizeof(match->key->ipv6.addr.src)) || - !is_all_set((const u8 *)ipv6_key->ipv6_dst, - sizeof(match->key->ipv6.addr.dst))) - *exact_5tuple = false; - } } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { @@ -678,11 +642,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple tcp_key->tcp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_TCP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (tcp_key->tcp_src != htons(0xffff) || - tcp_key->tcp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { @@ -714,11 +673,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple udp_key->udp_dst, is_mask); } attrs &= ~(1 << OVS_KEY_ATTR_UDP); - - if (is_mask && exact_5tuple && *exact_5tuple && - (udp_key->udp_src != htons(0xffff) || - udp_key->udp_dst != htons(0xffff))) - *exact_5tuple = false; } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { @@ -804,7 +758,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask, * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *key, const struct nlattr *mask) { @@ -852,13 +805,10 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false); + err = ovs_key_from_nlattrs(match, key_attrs, a, false); if (err) return err; - if (exact_5tuple) - *exact_5tuple = true; - if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) @@ -896,7 +846,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, } } - err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) return err; } else { diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index b31fbe28bc7a..440151045d39 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *, int ovs_nla_get_flow_metadata(struct sw_flow *flow, const struct nlattr *attr); int ovs_nla_get_match(struct sw_flow_match *match, - bool *exact_5tuple, const struct nlattr *, const struct nlattr *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 1ba1e0b8ade5..aa92da23053d 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -72,7 +72,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, *d++ = *s++ & *m++; } -struct sw_flow *ovs_flow_alloc(bool percpu_stats) +struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; int cpu; @@ -84,25 +84,15 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats) flow->sf_acts = NULL; flow->mask = NULL; - flow->stats.is_percpu = percpu_stats; + flow->stats = alloc_percpu(struct flow_stats); + if (!flow->stats) + goto err; - if (!percpu_stats) { - flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL); - if (!flow->stats.stat) - goto err; + for_each_possible_cpu(cpu) { + struct flow_stats *cpu_stats; - spin_lock_init(&flow->stats.stat->lock); - } else { - flow->stats.cpu_stats = alloc_percpu(struct flow_stats); - if (!flow->stats.cpu_stats) - goto err; - - for_each_possible_cpu(cpu) { - struct flow_stats *cpu_stats; - - cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - spin_lock_init(&cpu_stats->lock); - } + cpu_stats = per_cpu_ptr(flow->stats, cpu); + spin_lock_init(&cpu_stats->lock); } return flow; err: @@ -141,10 +131,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { kfree((struct sf_flow_acts __force *)flow->sf_acts); - if (flow->stats.is_percpu) - free_percpu(flow->stats.cpu_stats); - else - kfree(flow->stats.stat); + free_percpu(flow->stats); kmem_cache_free(flow_cache, flow); } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index baaeb101924d..c26c59a7ab57 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -55,7 +55,7 @@ struct flow_table { int ovs_flow_init(void); void ovs_flow_exit(void); -struct sw_flow *ovs_flow_alloc(bool percpu_stats); +struct sw_flow *ovs_flow_alloc(void); void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); -- cgit v1.2.3 From 63e7959c4b9bd6f791061c460a22d9ee32ae2240 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 27 Mar 2014 12:42:54 -0700 Subject: openvswitch: Per NUMA node flow stats. Keep kernel flow stats for each NUMA node rather than each (logical) CPU. This avoids using the per-CPU allocator and removes most of the kernel-side OVS locking overhead otherwise on the top of perf reports and allows OVS to scale better with higher number of threads. With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup rate doubles on a server with two hyper-threaded physical CPUs (16 logical cores each) compared to the current OVS master. Tested with non-trivial flow table with a TCP port match rule forcing all new connections with unique port numbers to OVS userspace. The IP addresses are still wildcarded, so the kernel flows are not considered as exact match 5-tuple flows. This type of flows can be expected to appear in large numbers as the result of more effective wildcarding made possible by improvements in OVS userspace flow classifier. Perf results for this test (master): Events: 305K cycles + 8.43% ovs-vswitchd [kernel.kallsyms] [k] mutex_spin_on_owner + 5.64% ovs-vswitchd [kernel.kallsyms] [k] __ticket_spin_lock + 4.75% ovs-vswitchd ovs-vswitchd [.] find_match_wc + 3.32% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_lock + 2.61% ovs-vswitchd [kernel.kallsyms] [k] pcpu_alloc_area + 2.19% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask_range + 2.03% swapper [kernel.kallsyms] [k] intel_idle + 1.84% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_unlock + 1.64% ovs-vswitchd ovs-vswitchd [.] classifier_lookup + 1.58% ovs-vswitchd libc-2.15.so [.] 0x7f4e6 + 1.07% ovs-vswitchd [kernel.kallsyms] [k] memset + 1.03% netperf [kernel.kallsyms] [k] __ticket_spin_lock + 0.92% swapper [kernel.kallsyms] [k] __ticket_spin_lock ... And after this patch: Events: 356K cycles + 6.85% ovs-vswitchd ovs-vswitchd [.] find_match_wc + 4.63% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_lock + 3.06% ovs-vswitchd [kernel.kallsyms] [k] __ticket_spin_lock + 2.81% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask_range + 2.51% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_unlock + 2.27% ovs-vswitchd ovs-vswitchd [.] classifier_lookup + 1.84% ovs-vswitchd libc-2.15.so [.] 0x15d30f + 1.74% ovs-vswitchd [kernel.kallsyms] [k] mutex_spin_on_owner + 1.47% swapper [kernel.kallsyms] [k] intel_idle + 1.34% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask + 1.33% ovs-vswitchd ovs-vswitchd [.] rule_actions_unref + 1.16% ovs-vswitchd ovs-vswitchd [.] hindex_node_with_hash + 1.16% ovs-vswitchd ovs-vswitchd [.] do_xlate_actions + 1.09% ovs-vswitchd ovs-vswitchd [.] ofproto_rule_ref + 1.01% netperf [kernel.kallsyms] [k] __ticket_spin_lock ... There is a small increase in kernel spinlock overhead due to the same spinlock being shared between multiple cores of the same physical CPU, but that is barely visible in the netperf TCP_CRR test performance (maybe ~1% performance drop, hard to tell exactly due to variance in the test results), when testing for kernel module throughput (with no userspace activity, handful of kernel flows). On flow setup, a single stats instance is allocated (for the NUMA node 0). As CPUs from multiple NUMA nodes start updating stats, new NUMA-node specific stats instances are allocated. This allocation on the packet processing code path is made to never block or look for emergency memory pools, minimizing the allocation latency. If the allocation fails, the existing preallocated stats instance is used. Also, if only CPUs from one NUMA-node are updating the preallocated stats instance, no additional stats instances are allocated. This eliminates the need to pre-allocate stats instances that will not be used, also relieving the stats reader from the burden of reading stats that are never used. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 119 ++++++++++++++++++++++++++++--------------- net/openvswitch/flow.h | 10 +++- net/openvswitch/flow_table.c | 46 +++++++++++++---- net/openvswitch/flow_table.h | 2 + 4 files changed, 122 insertions(+), 55 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index aad7a8da70b1..432f04d5c896 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -65,8 +65,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { struct flow_stats *stats; __be16 tcp_flags = 0; + int node = numa_node_id(); - stats = this_cpu_ptr(flow->stats); + stats = rcu_dereference(flow->stats[node]); if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && @@ -76,68 +77,102 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); } - spin_lock(&stats->lock); + /* Check if already have node-specific stats. */ + if (likely(stats)) { + spin_lock(&stats->lock); + /* Mark if we write on the pre-allocated stats. */ + if (node == 0 && unlikely(flow->stats_last_writer != node)) + flow->stats_last_writer = node; + } else { + stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ + spin_lock(&stats->lock); + + /* If the current NUMA-node is the only writer on the + * pre-allocated stats keep using them. + */ + if (unlikely(flow->stats_last_writer != node)) { + /* A previous locker may have already allocated the + * stats, so we need to check again. If node-specific + * stats were already allocated, we update the pre- + * allocated stats as we have already locked them. + */ + if (likely(flow->stats_last_writer != NUMA_NO_NODE) + && likely(!rcu_dereference(flow->stats[node]))) { + /* Try to allocate node-specific stats. */ + struct flow_stats *new_stats; + + new_stats = + kmem_cache_alloc_node(flow_stats_cache, + GFP_THISNODE | + __GFP_NOMEMALLOC, + node); + if (likely(new_stats)) { + new_stats->used = jiffies; + new_stats->packet_count = 1; + new_stats->byte_count = skb->len; + new_stats->tcp_flags = tcp_flags; + spin_lock_init(&new_stats->lock); + + rcu_assign_pointer(flow->stats[node], + new_stats); + goto unlock; + } + } + flow->stats_last_writer = node; + } + } + stats->used = jiffies; stats->packet_count++; stats->byte_count += skb->len; stats->tcp_flags |= tcp_flags; - spin_unlock(&stats->lock); -} - -static void stats_read(struct flow_stats *stats, - struct ovs_flow_stats *ovs_stats, - unsigned long *used, __be16 *tcp_flags) -{ - spin_lock(&stats->lock); - if (!*used || time_after(stats->used, *used)) - *used = stats->used; - *tcp_flags |= stats->tcp_flags; - ovs_stats->n_packets += stats->packet_count; - ovs_stats->n_bytes += stats->byte_count; +unlock: spin_unlock(&stats->lock); } void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu; + int node; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - local_bh_disable(); - - for_each_possible_cpu(cpu) { - struct flow_stats *stats; + for_each_node(node) { + struct flow_stats *stats = rcu_dereference(flow->stats[node]); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); - stats_read(stats, ovs_stats, used, tcp_flags); + if (stats) { + /* Local CPU may write on non-local stats, so we must + * block bottom-halves here. + */ + spin_lock_bh(&stats->lock); + if (!*used || time_after(stats->used, *used)) + *used = stats->used; + *tcp_flags |= stats->tcp_flags; + ovs_stats->n_packets += stats->packet_count; + ovs_stats->n_bytes += stats->byte_count; + spin_unlock_bh(&stats->lock); + } } - - local_bh_enable(); -} - -static void stats_reset(struct flow_stats *stats) -{ - spin_lock(&stats->lock); - stats->used = 0; - stats->packet_count = 0; - stats->byte_count = 0; - stats->tcp_flags = 0; - spin_unlock(&stats->lock); } void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu; - - local_bh_disable(); - - for_each_possible_cpu(cpu) - stats_reset(per_cpu_ptr(flow->stats, cpu)); - - local_bh_enable(); + int node; + + for_each_node(node) { + struct flow_stats *stats = rcu_dereference(flow->stats[node]); + + if (stats) { + spin_lock_bh(&stats->lock); + stats->used = 0; + stats->packet_count = 0; + stats->byte_count = 0; + stats->tcp_flags = 0; + spin_unlock_bh(&stats->lock); + } + } } static int check_header(struct sk_buff *skb, int len) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9c0dd8aa3117..ddcebc53224f 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -159,12 +159,18 @@ struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; - + int stats_last_writer; /* NUMA-node id of the last writer on + * 'stats[0]'. + */ struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __percpu *stats; + struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + * is allocated at flow creation time, + * the rest are allocated on demand + * while holding the 'stats[0].lock'. + */ }; struct arp_eth_header { diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index aa92da23053d..d8ef37b937bd 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -48,6 +48,7 @@ #define REHASH_INTERVAL (10 * 60 * HZ) static struct kmem_cache *flow_cache; +struct kmem_cache *flow_stats_cache __read_mostly; static u16 range_n_bytes(const struct sw_flow_key_range *range) { @@ -75,7 +76,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src, struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; - int cpu; + struct flow_stats *stats; + int node; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) @@ -83,17 +85,22 @@ struct sw_flow *ovs_flow_alloc(void) flow->sf_acts = NULL; flow->mask = NULL; + flow->stats_last_writer = NUMA_NO_NODE; - flow->stats = alloc_percpu(struct flow_stats); - if (!flow->stats) + /* Initialize the default stat node. */ + stats = kmem_cache_alloc_node(flow_stats_cache, + GFP_KERNEL | __GFP_ZERO, 0); + if (!stats) goto err; - for_each_possible_cpu(cpu) { - struct flow_stats *cpu_stats; + spin_lock_init(&stats->lock); + + RCU_INIT_POINTER(flow->stats[0], stats); + + for_each_node(node) + if (node != 0) + RCU_INIT_POINTER(flow->stats[node], NULL); - cpu_stats = per_cpu_ptr(flow->stats, cpu); - spin_lock_init(&cpu_stats->lock); - } return flow; err: kmem_cache_free(flow_cache, flow); @@ -130,8 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { + int node; + kfree((struct sf_flow_acts __force *)flow->sf_acts); - free_percpu(flow->stats); + for_each_node(node) + if (flow->stats[node]) + kmem_cache_free(flow_stats_cache, + (struct flow_stats __force *)flow->stats[node]); kmem_cache_free(flow_cache, flow); } @@ -586,16 +598,28 @@ int ovs_flow_init(void) BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); - flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, - 0, NULL); + flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) + + (num_possible_nodes() + * sizeof(struct flow_stats *)), + 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; + flow_stats_cache + = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (flow_stats_cache == NULL) { + kmem_cache_destroy(flow_cache); + flow_cache = NULL; + return -ENOMEM; + } + return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { + kmem_cache_destroy(flow_stats_cache); kmem_cache_destroy(flow_cache); } diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index c26c59a7ab57..ca8a5820f615 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -52,6 +52,8 @@ struct flow_table { unsigned int count; }; +extern struct kmem_cache *flow_stats_cache; + int ovs_flow_init(void); void ovs_flow_exit(void); -- cgit v1.2.3 From d92ab13558599cf73bbc269ce257fe16575d327a Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 27 Mar 2014 12:47:11 -0700 Subject: openvswitch: Fix output of SCTP mask. The 'output' argument of the ovs_nla_put_flow() is the one from which the bits are written to the netlink attributes. For SCTP we accidentally used the bits from the 'swkey' instead. This caused the mask attributes to include the bits from the actual flow key instead of the mask. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 84caa99b3769..32a725cfeb0e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1059,11 +1059,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, goto nla_put_failure; sctp_key = nla_data(nla); if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = swkey->ipv4.tp.src; - sctp_key->sctp_dst = swkey->ipv4.tp.dst; + sctp_key->sctp_src = output->ipv4.tp.src; + sctp_key->sctp_dst = output->ipv4.tp.dst; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = swkey->ipv6.tp.src; - sctp_key->sctp_dst = swkey->ipv6.tp.dst; + sctp_key->sctp_src = output->ipv6.tp.src; + sctp_key->sctp_dst = output->ipv6.tp.dst; } } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { -- cgit v1.2.3 From 88d73f6c411ac2f057829b93b3cf202ee551f6cb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Thu, 27 Mar 2014 12:51:49 -0700 Subject: openvswitch: Use TCP flags in the flow key for stats. We already extract the TCP flags for the key, might as well use that for stats. Signed-off-by: Jarno Rajahalme Acked-by: Pravin B Shelar Signed-off-by: Jesse Gross --- net/openvswitch/flow.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 432f04d5c896..e0fc12bbeeb1 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -69,14 +69,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) stats = rcu_dereference(flow->stats[node]); - if ((flow->key.eth.type == htons(ETH_P_IP) || - flow->key.eth.type == htons(ETH_P_IPV6)) && - flow->key.ip.frag != OVS_FRAG_TYPE_LATER && - flow->key.ip.proto == IPPROTO_TCP && - likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { - tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); + if (likely(flow->key.ip.proto == IPPROTO_TCP)) { + if (likely(flow->key.eth.type == htons(ETH_P_IP))) + tcp_flags = flow->key.ipv4.tp.flags; + else if (likely(flow->key.eth.type == htons(ETH_P_IPV6))) + tcp_flags = flow->key.ipv6.tp.flags; } - /* Check if already have node-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); -- cgit v1.2.3 From 944df8ae84d88f5e8eb027990dad2cfa4fbe4be5 Mon Sep 17 00:00:00 2001 From: Monam Agarwal Date: Mon, 24 Mar 2014 00:52:43 +0530 Subject: net/openvswitch: Use with RCU_INIT_POINTER(x, NULL) in vport-gre.c This patch replaces rcu_assign_pointer(x, NULL) with RCU_INIT_POINTER(x, NULL) The rcu_assign_pointer() ensures that the initialization of a structure is carried out before storing a pointer to that structure. And in the case of the NULL pointer, there is no structure to initialize. So, rcu_assign_pointer(p, NULL) can be safely converted to RCU_INIT_POINTER(p, NULL) Signed-off-by: Monam Agarwal Signed-off-by: Jesse Gross --- net/openvswitch/vport-gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 0856f014d7a9..35ec4fed09e2 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -256,7 +256,7 @@ static void gre_tnl_destroy(struct vport *vport) ovs_net = net_generic(net, ovs_net_id); - rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL); ovs_vport_deferred_free(vport); gre_exit(); } -- cgit v1.2.3 From 1139e241ec436b9e9610c7a33ac5c6657f87fda1 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:54:49 -0700 Subject: openvswitch: Compact sw_flow_key. Minimize padding in sw_flow_key and move 'tp' top the main struct. These changes simplify code when accessing the transport port numbers and the tcp flags, and makes the sw_flow_key 8 bytes smaller on 64-bit systems (128->120 bytes). These changes also make the keys for IPv4 packets to fit in one cache line. There is a valid concern for safety of packing the struct ovs_key_ipv4_tunnel, as it would be possible to take the address of the tun_id member as a __be64 * which could result in unaligned access in some systems. However: - sw_flow_key itself is 64-bit aligned, so the tun_id within is always 64-bit aligned. - We never make arrays of ovs_key_ipv4_tunnel (which would force every second tun_key to be misaligned). - We never take the address of the tun_id in to a __be64 *. - Whereever we use struct ovs_key_ipv4_tunnel outside the sw_flow_key, it is in stack (on tunnel input functions), where compiler has full control of the alignment. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow.c | 44 +++++++--------- net/openvswitch/flow.h | 29 ++++------- net/openvswitch/flow_netlink.c | 112 ++++++++++++----------------------------- 3 files changed, 62 insertions(+), 123 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index e0fc12bbeeb1..6d8d2da0a8ec 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -64,17 +64,11 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies) void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) { struct flow_stats *stats; - __be16 tcp_flags = 0; + __be16 tcp_flags = flow->key.tp.flags; int node = numa_node_id(); stats = rcu_dereference(flow->stats[node]); - if (likely(flow->key.ip.proto == IPPROTO_TCP)) { - if (likely(flow->key.eth.type == htons(ETH_P_IP))) - tcp_flags = flow->key.ipv4.tp.flags; - else if (likely(flow->key.eth.type == htons(ETH_P_IPV6))) - tcp_flags = flow->key.ipv6.tp.flags; - } /* Check if already have node-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); @@ -357,8 +351,8 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ - key->ipv6.tp.src = htons(icmp->icmp6_type); - key->ipv6.tp.dst = htons(icmp->icmp6_code); + key->tp.src = htons(icmp->icmp6_type); + key->tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -520,21 +514,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv4.tp.src = tcp->source; - key->ipv4.tp.dst = tcp->dest; - key->ipv4.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv4.tp.src = udp->source; - key->ipv4.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv4.tp.src = sctp->source; - key->ipv4.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { @@ -542,8 +536,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ - key->ipv4.tp.src = htons(icmp->type); - key->ipv4.tp.dst = htons(icmp->code); + key->tp.src = htons(icmp->type); + key->tp.dst = htons(icmp->code); } } @@ -589,21 +583,21 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); - key->ipv6.tp.src = tcp->source; - key->ipv6.tp.dst = tcp->dest; - key->ipv6.tp.flags = TCP_FLAGS_BE16(tcp); + key->tp.src = tcp->source; + key->tp.dst = tcp->dest; + key->tp.flags = TCP_FLAGS_BE16(tcp); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); - key->ipv6.tp.src = udp->source; - key->ipv6.tp.dst = udp->dest; + key->tp.src = udp->source; + key->tp.dst = udp->dest; } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); - key->ipv6.tp.src = sctp->source; - key->ipv6.tp.dst = sctp->dest; + key->tp.src = sctp->source; + key->tp.dst = sctp->dest; } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index ddcebc53224f..a292bf8ad75c 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -47,7 +47,7 @@ struct ovs_key_ipv4_tunnel { __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; -}; +} __packed __aligned(4); /* Minimize padding. */ static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, @@ -71,7 +71,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ - } phy; + } __packed phy; /* Safe when right after 'tun_key'. */ struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ @@ -84,23 +84,21 @@ struct sw_flow_key { u8 ttl; /* IP TTL/hop limit. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; + struct { + __be16 src; /* TCP/UDP/SCTP source port. */ + __be16 dst; /* TCP/UDP/SCTP destination port. */ + __be16 flags; /* TCP flags. */ + } tp; union { struct { struct { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; - union { - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; - struct { - u8 sha[ETH_ALEN]; /* ARP source hardware address. */ - u8 tha[ETH_ALEN]; /* ARP target hardware address. */ - } arp; - }; + struct { + u8 sha[ETH_ALEN]; /* ARP source hardware address. */ + u8 tha[ETH_ALEN]; /* ARP target hardware address. */ + } arp; } ipv4; struct { struct { @@ -108,11 +106,6 @@ struct sw_flow_key { struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ - struct { - __be16 src; /* TCP/UDP/SCTP source port. */ - __be16 dst; /* TCP/UDP/SCTP destination port. */ - __be16 flags; /* TCP flags. */ - } tp; struct { struct in6_addr target; /* ND target address. */ u8 sll[ETH_ALEN]; /* ND source link layer address. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 32a725cfeb0e..d757848da89c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -204,11 +204,11 @@ static bool match_validate(const struct sw_flow_match *match, if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; - if (match->key->ipv6.tp.src == + if (match->key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || - match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; - if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } } @@ -630,27 +630,18 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - tcp_key->tcp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - tcp_key->tcp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - tcp_key->tcp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP); } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.flags, + SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); } @@ -661,17 +652,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - udp_key->udp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - udp_key->udp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - udp_key->udp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_UDP); } @@ -679,17 +661,8 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, ipv4.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, - sctp_key->sctp_dst, is_mask); - } else { - SW_FLOW_KEY_PUT(match, ipv6.tp.src, - sctp_key->sctp_src, is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, - sctp_key->sctp_dst, is_mask); - } + SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); + SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_SCTP); } @@ -697,9 +670,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); - SW_FLOW_KEY_PUT(match, ipv4.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmp_key->icmp_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv4.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMP); } @@ -708,9 +681,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); - SW_FLOW_KEY_PUT(match, ipv6.tp.src, + SW_FLOW_KEY_PUT(match, tp.src, htons(icmpv6_key->icmpv6_type), is_mask); - SW_FLOW_KEY_PUT(match, ipv6.tp.dst, + SW_FLOW_KEY_PUT(match, tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); } @@ -1024,19 +997,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - tcp_key->tcp_src = output->ipv4.tp.src; - tcp_key->tcp_dst = output->ipv4.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv4.tp.flags)) - goto nla_put_failure; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - tcp_key->tcp_src = output->ipv6.tp.src; - tcp_key->tcp_dst = output->ipv6.tp.dst; - if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, - output->ipv6.tp.flags)) - goto nla_put_failure; - } + tcp_key->tcp_src = output->tp.src; + tcp_key->tcp_dst = output->tp.dst; + if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, + output->tp.flags)) + goto nla_put_failure; } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; @@ -1044,13 +1009,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; udp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - udp_key->udp_src = output->ipv4.tp.src; - udp_key->udp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - udp_key->udp_src = output->ipv6.tp.src; - udp_key->udp_dst = output->ipv6.tp.dst; - } + udp_key->udp_src = output->tp.src; + udp_key->udp_dst = output->tp.dst; } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; @@ -1058,13 +1018,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); - if (swkey->eth.type == htons(ETH_P_IP)) { - sctp_key->sctp_src = output->ipv4.tp.src; - sctp_key->sctp_dst = output->ipv4.tp.dst; - } else if (swkey->eth.type == htons(ETH_P_IPV6)) { - sctp_key->sctp_src = output->ipv6.tp.src; - sctp_key->sctp_dst = output->ipv6.tp.dst; - } + sctp_key->sctp_src = output->tp.src; + sctp_key->sctp_dst = output->tp.dst; } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; @@ -1073,8 +1028,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); - icmp_key->icmp_type = ntohs(output->ipv4.tp.src); - icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); + icmp_key->icmp_type = ntohs(output->tp.src); + icmp_key->icmp_code = ntohs(output->tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; @@ -1084,8 +1039,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); - icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); - icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); + icmpv6_key->icmpv6_type = ntohs(output->tp.src); + icmpv6_key->icmpv6_code = ntohs(output->tp.dst); if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { @@ -1263,13 +1218,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { - if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) - return 0; - } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) - return 0; - } + if ((flow_key->eth.type == htons(ETH_P_IP) || + flow_key->eth.type == htons(ETH_P_IPV6)) && + (flow_key->tp.src || flow_key->tp.dst)) + return 0; return -EINVAL; } -- cgit v1.2.3 From be52c9e96a6657d117bb0ec6e11438fb246af5c7 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 09:59:40 -0700 Subject: openvswitch: Avoid assigning a NULL pointer to flow actions. Flow SET can accept an empty set of actions, with the intended semantics of leaving existing actions unmodified. This seems to have been brokin after OVS 1.7, as we have assigned the flow's actions pointer to NULL in this case, but we never check for the NULL pointer later on. This patch restores the intended behavior and documents it in the include/linux/openvswitch.h. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8867d7e2d65b..90a1e5e66287 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -810,6 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_kfree; } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { + /* OVS_FLOW_CMD_NEW must have actions. */ error = -EINVAL; goto error; } @@ -849,8 +850,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ - struct sw_flow_actions *old_acts; - /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -866,11 +865,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions. */ - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); /* Clear stats. */ -- cgit v1.2.3 From bb6f9a708d4067713afae2e9eb2637f6b4c01ecb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 11:32:17 -0700 Subject: openvswitch: Clarify locking. Remove unnecessary locking from functions that are always called with appropriate locking. Signed-off-by: Jarno Rajahalme Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 13 +++++++------ net/openvswitch/flow.c | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 90a1e5e66287..138ea0c9e1b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -173,6 +173,7 @@ static struct hlist_head *vport_hash_bucket(const struct datapath *dp, return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } +/* Called with ovs_mutex or RCU read lock. */ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; @@ -652,7 +653,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } -/* Called with ovs_mutex. */ +/* Called with ovs_mutex or RCU read lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) @@ -743,6 +744,7 @@ error: return err; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, struct genl_info *info) { @@ -753,6 +755,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, return genlmsg_new_unicast(len, info, GFP_KERNEL); } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, @@ -1094,6 +1097,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } +/* Called with ovs_mutex or RCU read lock. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1109,9 +1113,7 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, ovs_header->dp_ifindex = get_dpifindex(dp); - rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); - rcu_read_unlock(); if (err) goto nla_put_failure; @@ -1136,6 +1138,7 @@ error: return -EMSGSIZE; } +/* Must be called with ovs_mutex. */ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, struct genl_info *info, u8 cmd) { @@ -1154,7 +1157,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, return skb; } -/* Called with ovs_mutex. */ +/* Called with rcu_read_lock or ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1166,10 +1169,8 @@ static struct datapath *lookup_datapath(struct net *net, else { struct vport *vport; - rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; - rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6d8d2da0a8ec..1019fc1db06e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -122,6 +122,7 @@ unlock: spin_unlock(&stats->lock); } +/* Called with ovs_mutex. */ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { @@ -132,7 +133,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); for_each_node(node) { - struct flow_stats *stats = rcu_dereference(flow->stats[node]); + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { /* Local CPU may write on non-local stats, so we must -- cgit v1.2.3 From fb5d1e9e127ad1542e5db20cd8620a1509baef69 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 13:13:14 -0700 Subject: openvswitch: Build flow cmd netlink reply only if needed. Use netlink_has_listeners() and NLM_F_ECHO flag to determine if a reply is needed or not for OVS_FLOW_CMD_NEW, OVS_FLOW_CMD_SET, or OVS_FLOW_CMD_DEL. Currently, OVS userspace does not request a reply for OVS_FLOW_CMD_NEW, but usually does for OVS_FLOW_CMD_DEL, as stats may have changed. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 70 +++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 22 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 138ea0c9e1b3..cb5d64a5c759 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -62,6 +62,15 @@ int ovs_net_id __read_mostly; +/* Check if need to build a reply message. + * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ +static bool ovs_must_notify(struct genl_info *info, + const struct genl_multicast_group *grp) +{ + return info->nlhdr->nlmsg_flags & NLM_F_ECHO || + netlink_has_listeners(genl_info_net(info)->genl_sock, 0); +} + static void ovs_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info) { @@ -746,27 +755,36 @@ error: /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, - struct genl_info *info) + struct genl_info *info, + bool always) { + struct sk_buff *skb; size_t len; + if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) + return NULL; + len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - return genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + if (!skb) + return ERR_PTR(-ENOMEM); + + return skb; } /* Must be called with ovs_mutex. */ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, struct genl_info *info, - u8 cmd) + u8 cmd, bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info); - if (!skb) - return ERR_PTR(-ENOMEM); + skb = ovs_flow_cmd_alloc_info(flow, info, always); + if (!skb || IS_ERR(skb)) + return skb; retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, info->snd_seq, 0, cmd); @@ -850,7 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -876,7 +895,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, + OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -884,11 +904,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } ovs_unlock(); - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, + 0, PTR_ERR(reply)); + } + return 0; err_flow_free: @@ -935,7 +958,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW); + reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -982,22 +1005,25 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info); - if (!reply) { - err = -ENOMEM; + reply = ovs_flow_cmd_alloc_info(flow, info, false); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); goto unlock; } ovs_flow_tbl_remove(&dp->table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, - info->snd_seq, 0, OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); - + if (reply) { + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + BUG_ON(err < 0); + } ovs_flow_free(flow, true); ovs_unlock(); - ovs_notify(&dp_flow_genl_family, reply, info); + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; unlock: ovs_unlock(); -- cgit v1.2.3 From 56c19868e115fcf8d62d843e1b9616bb9837d0db Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 13:24:53 -0700 Subject: openvswitch: Make flow mask removal symmetric. Masks are inserted when flows are inserted to the table, so it is logical to correspondingly remove masks when flows are removed from the table, in ovs_flow_table_remove(). This allows ovs_flow_free() to be called without locking, which will be used by later patches. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_table.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d8ef37b937bd..c80df6f42fee 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -159,25 +159,6 @@ void ovs_flow_free(struct sw_flow *flow, bool deferred) if (!flow) return; - if (flow->mask) { - struct sw_flow_mask *mask = flow->mask; - - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) { - list_del_rcu(&mask->list); - if (deferred) - kfree_rcu(mask, rcu); - else - kfree(mask); - } - } - if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else @@ -491,6 +472,25 @@ static struct table_instance *table_instance_expand(struct table_instance *ti) return table_instance_rehash(ti, ti->n_buckets * 2); } +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) { + list_del_rcu(&mask->list); + kfree_rcu(mask, rcu); + } + } +} + +/* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { struct table_instance *ti = ovsl_dereference(table->ti); @@ -498,6 +498,11 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[ti->node_ver]); table->count--; + + /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be + * accessible as long as the RCU read lock is held. + */ + flow_mask_remove(table, flow->mask); } static struct sw_flow_mask *mask_alloc(void) @@ -560,6 +565,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, return 0; } +/* Must be called with OVS mutex held. */ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, struct sw_flow_mask *mask) { -- cgit v1.2.3 From 6093ae9abac18871afd0bbc5cf093dff53112fcb Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:13:32 -0700 Subject: openvswitch: Minimize dp and vport critical sections. Move most memory allocations away from the ovs_mutex critical sections. vport allocations still happen while the lock is taken, as changing that would require major refactoring. Also, vports are created very rarely so it should not matter. Change ovs_dp_cmd_get() now only takes the rcu_read_lock(), rather than ovs_lock(), as nothing need to be changed. This was done by ovs_vport_cmd_get() already. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 218 +++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 108 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cb5d64a5c759..f3bcdac80bda 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1164,23 +1164,9 @@ error: return -EMSGSIZE; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, - struct genl_info *info, u8 cmd) +static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info) { - struct sk_buff *skb; - int retval; - - skb = genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); - if (!skb) - return ERR_PTR(-ENOMEM); - - retval = ovs_dp_cmd_fill_info(dp, skb, info->snd_portid, info->snd_seq, 0, cmd); - if (retval < 0) { - kfree_skb(skb); - return ERR_PTR(retval); - } - return skb; + return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL); } /* Called with rcu_read_lock or ovs_mutex. */ @@ -1233,12 +1219,14 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_unlock_ovs; + goto err_free_reply; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1273,6 +1261,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_change(dp, a); + /* So far only local changes have been made, now need the lock. */ + ovs_lock(); + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1291,10 +1282,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto err_destroy_local_port; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); @@ -1304,9 +1294,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_local_port: - ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: + ovs_unlock(); kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1315,8 +1304,8 @@ err_destroy_table: err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); -err_unlock_ovs: - ovs_unlock(); +err_free_reply: + kfree_skb(reply); err: return err; } @@ -1354,16 +1343,19 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto unlock; + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_DEL); + BUG_ON(err < 0); __dp_destroy(dp); ovs_unlock(); @@ -1371,8 +1363,10 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1382,29 +1376,30 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) - goto unlock; + goto err_unlock_free; ovs_dp_change(dp, info->attrs); - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - genl_set_err(&dp_datapath_genl_family, sock_net(skb->sk), 0, - 0, err); - err = 0; - goto unlock; - } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); ovs_unlock(); ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -unlock: + +err_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1414,24 +1409,26 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; int err; - ovs_lock(); + reply = ovs_dp_cmd_alloc_info(info); + if (!reply) + return -ENOMEM; + + rcu_read_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); - goto unlock; - } - - reply = ovs_dp_cmd_build_info(dp, info, OVS_DP_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; + goto err_unlock_free; } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, + info->snd_seq, 0, OVS_DP_CMD_NEW); + BUG_ON(err < 0); + rcu_read_unlock(); - ovs_unlock(); return genlmsg_reply(reply, info); -unlock: - ovs_unlock(); +err_unlock_free: + rcu_read_unlock(); + kfree_skb(reply); return err; } @@ -1544,7 +1541,12 @@ error: return err; } -/* Called with ovs_mutex or RCU read lock. */ +static struct sk_buff *ovs_vport_cmd_alloc_info(void) +{ + return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +} + +/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { @@ -1606,33 +1608,35 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) u32 port_no; int err; - err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) - goto exit; + return -EINVAL; + + port_no = a[OVS_VPORT_ATTR_PORT_NO] + ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; + if (port_no >= DP_MAX_PORTS) + return -EFBIG; + + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) - goto exit_unlock; - - if (a[OVS_VPORT_ATTR_PORT_NO]) { - port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); - - err = -EFBIG; - if (port_no >= DP_MAX_PORTS) - goto exit_unlock; + goto exit_unlock_free; + if (port_no) { vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) - goto exit_unlock; + goto exit_unlock_free; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; - goto exit_unlock; + goto exit_unlock_free; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) @@ -1650,22 +1654,19 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; - err = 0; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - ovs_dp_detach_port(vport); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); -exit: + kfree_skb(reply); return err; } @@ -1676,28 +1677,26 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; - goto exit_unlock; - } - - reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!reply) { - err = -ENOMEM; - goto exit_unlock; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) - goto exit_free; + goto exit_unlock_free; } if (a[OVS_VPORT_ATTR_UPCALL_PID]) @@ -1711,10 +1710,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_free: - kfree_skb(reply); -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1725,30 +1723,33 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; + goto exit_unlock_free; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; - goto exit_unlock; + goto exit_unlock_free; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_DEL); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - - err = 0; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_DEL); + BUG_ON(err < 0); ovs_dp_detach_port(vport); + ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); + return 0; -exit_unlock: +exit_unlock_free: ovs_unlock(); + kfree_skb(reply); return err; } @@ -1760,24 +1761,25 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) struct vport *vport; int err; + reply = ovs_vport_cmd_alloc_info(); + if (!reply) + return -ENOMEM; + rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) - goto exit_unlock; - - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, - info->snd_seq, OVS_VPORT_CMD_NEW); - err = PTR_ERR(reply); - if (IS_ERR(reply)) - goto exit_unlock; - + goto exit_unlock_free; + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); rcu_read_unlock(); return genlmsg_reply(reply, info); -exit_unlock: +exit_unlock_free: rcu_read_unlock(); + kfree_skb(reply); return err; } -- cgit v1.2.3 From eb07265904d6ee95497aba0f3cbd2ae6d9c39a97 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:15:18 -0700 Subject: openvswitch: Fix typo. Incorrect struct name was confusing, even though otherwise inconsequental. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index c80df6f42fee..574c3abc9b30 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -139,7 +139,7 @@ static void flow_free(struct sw_flow *flow) { int node; - kfree((struct sf_flow_acts __force *)flow->sf_acts); + kfree((struct sw_flow_actions __force *)flow->sf_acts); for_each_node(node) if (flow->stats[node]) kmem_cache_free(flow_stats_cache, -- cgit v1.2.3 From 86ec8dbae27e5fa2b5d54f10f77286d9ef55732a Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:17:28 -0700 Subject: openvswitch: Fix ovs_flow_stats_get/clear RCU dereference. For ovs_flow_stats_get() using ovsl_dereference() was wrong, since flow dumps call this with RCU read lock. ovs_flow_stats_clear() is always called with ovs_mutex, so can use ovsl_dereference(). Also, make the ovs_flow_stats_get() 'flow' argument const to make later patches cleaner. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow.c | 10 ++++++---- net/openvswitch/flow.h | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 1019fc1db06e..334751cb1528 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -122,8 +122,9 @@ unlock: spin_unlock(&stats->lock); } -/* Called with ovs_mutex. */ -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, +/* Must be called with rcu_read_lock or ovs_mutex. */ +void ovs_flow_stats_get(const struct sw_flow *flow, + struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { int node; @@ -133,7 +134,7 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, memset(ovs_stats, 0, sizeof(*ovs_stats)); for_each_node(node) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -150,12 +151,13 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, } } +/* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { int node; for_each_node(node) { - struct flow_stats *stats = rcu_dereference(flow->stats[node]); + struct flow_stats *stats = ovsl_dereference(flow->stats[node]); if (stats) { spin_lock_bh(&stats->lock); diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a292bf8ad75c..ac395d2cd821 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -180,10 +180,10 @@ struct arp_eth_header { unsigned char ar_tip[4]; /* target IP address */ } __packed; -void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb); -void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats, +void ovs_flow_stats_update(struct sw_flow *, struct sk_buff *); +void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, unsigned long *used, __be16 *tcp_flags); -void ovs_flow_stats_clear(struct sw_flow *flow); +void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); -- cgit v1.2.3 From 0e9796b4af9ef490e203158cb738a5a4986eb75c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:28:07 -0700 Subject: openvswitch: Reduce locking requirements. Reduce and clarify locking requirements for ovs_flow_cmd_alloc_info(), ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info(). A datapath pointer is available only when holding a lock. Change ovs_flow_cmd_fill_info() and ovs_flow_cmd_build_info() to take a dp_ifindex directly, rather than a datapath pointer that is then (only) used to get the dp_ifindex. This is useful, since the dp_ifindex is available even when the datapath pointer is not, both before and after taking a lock, which makes further critical section reduction possible. Make ovs_flow_cmd_alloc_info() take an 'acts' argument instead a 'flow' pointer. This allows some future patches to do the allocation before acquiring the flow pointer. The locking requirements after this patch are: ovs_flow_cmd_alloc_info(): May be called without locking, must not be called while holding the RCU read lock (due to memory allocation). If 'acts' belong to a flow in the flow table, however, then the caller must hold ovs_mutex. ovs_flow_cmd_fill_info(): Either ovs_mutex or RCU read lock must be held. ovs_flow_cmd_build_info(): This calls both of the above, so the caller must hold ovs_mutex. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 54 +++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 25 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f3bcdac80bda..949fc7fadaf0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -663,7 +663,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) } /* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -680,7 +680,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, if (!ovs_header) return -EMSGSIZE; - ovs_header->dp_ifindex = get_dpifindex(dp); + ovs_header->dp_ifindex = dp_ifindex; /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); @@ -703,6 +703,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, nla_nest_end(skb, nla); ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); + if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; @@ -730,9 +731,9 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_ovsl(flow->sf_acts); - err = ovs_nla_put_actions(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) nla_nest_end(skb, start); else { @@ -753,41 +754,40 @@ error: return err; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow, +/* May not be called with RCU read lock. */ +static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts, struct genl_info *info, bool always) { struct sk_buff *skb; - size_t len; if (!always && !ovs_must_notify(info, &ovs_dp_flow_multicast_group)) return NULL; - len = ovs_flow_cmd_msg_size(ovsl_dereference(flow->sf_acts)); - - skb = genlmsg_new_unicast(len, info, GFP_KERNEL); + skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); return skb; } -/* Must be called with ovs_mutex. */ -static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, - struct datapath *dp, - struct genl_info *info, - u8 cmd, bool always) +/* Called with ovs_mutex. */ +static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, + int dp_ifindex, + struct genl_info *info, u8 cmd, + bool always) { struct sk_buff *skb; int retval; - skb = ovs_flow_cmd_alloc_info(flow, info, always); + skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + always); if (!skb || IS_ERR(skb)) return skb; - retval = ovs_flow_cmd_fill_info(flow, dp, skb, info->snd_portid, - info->snd_seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, + info->snd_portid, info->snd_seq, 0, + cmd); BUG_ON(retval < 0); return skb; } @@ -868,8 +868,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); } else { /* We found a matching flow. */ /* Bail out if we're not allowed to modify an existing flow. @@ -895,8 +895,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); ovs_nla_free_flow_actions(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info, - OVS_FLOW_CMD_NEW, false); + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) @@ -958,7 +959,8 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW, true); + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info, + OVS_FLOW_CMD_NEW, true); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1005,7 +1007,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) goto unlock; } - reply = ovs_flow_cmd_alloc_info(flow, info, false); + reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, + false); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; @@ -1014,7 +1017,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(&dp->table, flow); if (reply) { - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); @@ -1054,7 +1058,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!flow) break; - if (ovs_flow_cmd_fill_info(flow, dp, skb, + if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) -- cgit v1.2.3 From aed067783e505bf66dcafa8647d08619eb5b1c55 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:40:13 -0700 Subject: openvswitch: Minimize ovs_flow_cmd_del critical section. ovs_flow_cmd_del() now allocates reply (if needed) after the flow has already been removed from the flow table. If the reply allocation fails, a netlink error is signaled with netlink_set_err(), as is already done in ovs_flow_cmd_new_or_set() in the similar situation. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 53 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 949fc7fadaf0..f28beda8426f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -984,50 +984,53 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int err; + if (likely(a[OVS_FLOW_ATTR_KEY])) { + ovs_match_init(&match, &key, NULL); + err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); + if (unlikely(err)) + return err; + } + ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) { + if (unlikely(!dp)) { err = -ENODEV; goto unlock; } - if (!a[OVS_FLOW_ATTR_KEY]) { + if (unlikely(!a[OVS_FLOW_ATTR_KEY])) { err = ovs_flow_tbl_flush(&dp->table); goto unlock; } - ovs_match_init(&match, &key, NULL); - err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL); - if (err) - goto unlock; - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow || !ovs_flow_cmp_unmasked_key(flow, &match)) { + if (unlikely(!flow || !ovs_flow_cmp_unmasked_key(flow, &match))) { err = -ENOENT; goto unlock; } - reply = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info, - false); - if (IS_ERR(reply)) { - err = PTR_ERR(reply); - goto unlock; - } - ovs_flow_tbl_remove(&dp->table, flow); + ovs_unlock(); - if (reply) { - err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, - reply, info->snd_portid, - info->snd_seq, 0, - OVS_FLOW_CMD_DEL); - BUG_ON(err < 0); + reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, + info, false); + if (likely(reply)) { + if (likely(!IS_ERR(reply))) { + rcu_read_lock(); /*To keep RCU checker happy. */ + err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_DEL); + rcu_read_unlock(); + BUG_ON(err < 0); + + ovs_notify(&dp_flow_genl_family, reply, info); + } else { + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, PTR_ERR(reply)); + } } - ovs_flow_free(flow, true); - ovs_unlock(); - if (reply) - ovs_notify(&dp_flow_genl_family, reply, info); + ovs_flow_free(flow, true); return 0; unlock: ovs_unlock(); -- cgit v1.2.3 From 37bdc87ba00dadd0156db77ba48224d042202435 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 14:53:51 -0700 Subject: openvswitch: Split ovs_flow_cmd_new_or_set(). Following patch will be easier to reason about with separate ovs_flow_cmd_new() and ovs_flow_cmd_set() functions. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 160 ++++++++++++++++++++++++++++++++------------- 1 file changed, 116 insertions(+), 44 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f28beda8426f..22665a6144fc 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -792,16 +792,16 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, return skb; } -static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) +static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key, masked_key; - struct sw_flow *flow = NULL; + struct sw_flow *flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *acts; struct sw_flow_match match; int error; @@ -817,23 +817,21 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) goto error; /* Validate actions. */ - if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error; + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); - if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; - } - } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { - /* OVS_FLOW_CMD_NEW must have actions. */ - error = -EINVAL; + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree; } ovs_lock(); @@ -845,11 +843,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Check if this is a duplicate flow */ flow = ovs_flow_tbl_lookup(&dp->table, &key); if (!flow) { - /* Bail out if we're not allowed to create a new flow. */ - error = -ENOENT; - if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) - goto err_unlock_ovs; - /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { @@ -867,11 +860,9 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) acts = NULL; goto err_flow_free; } - - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); } else { - /* We found a matching flow. */ + struct sw_flow_actions *old_acts; + /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -879,30 +870,113 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) * gets fixed. */ error = -EEXIST; - if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && - info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) goto err_unlock_ovs; /* The unmasked key has to be the same for flow updates. */ if (!ovs_flow_cmp_unmasked_key(flow, &match)) goto err_unlock_ovs; - /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; + /* Update actions. */ + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); + } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + ovs_unlock(); + + if (reply) { + if (!IS_ERR(reply)) + ovs_notify(&dp_flow_genl_family, reply, info); + else + netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, + PTR_ERR(reply)); + } + return 0; + +err_flow_free: + ovs_flow_free(flow, false); +err_unlock_ovs: + ovs_unlock(); +err_kfree: + kfree(acts); +error: + return error; +} - old_acts = ovsl_dereference(flow->sf_acts); - rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); +static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct ovs_header *ovs_header = info->userhdr; + struct sw_flow_key key, masked_key; + struct sw_flow *flow; + struct sw_flow_mask mask; + struct sk_buff *reply = NULL; + struct datapath *dp; + struct sw_flow_actions *acts = NULL; + struct sw_flow_match match; + int error; + + /* Extract key. */ + error = -EINVAL; + if (!a[OVS_FLOW_ATTR_KEY]) + goto error; + + ovs_match_init(&match, &key, &mask); + error = ovs_nla_get_match(&match, + a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); + if (error) + goto error; + + /* Validate actions. */ + if (a[OVS_FLOW_ATTR_ACTIONS]) { + acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) + goto error; + + ovs_flow_mask_key(&masked_key, &key, &mask); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], + &masked_key, 0, &acts); + if (error) { + OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); + goto err_kfree; } + } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + error = -ENODEV; + if (!dp) + goto err_unlock_ovs; + + /* Check that the flow exists. */ + flow = ovs_flow_tbl_lookup(&dp->table, &key); + error = -ENOENT; + if (!flow) + goto err_unlock_ovs; + + /* The unmasked key has to be the same for flow updates. */ + error = -EEXIST; + if (!ovs_flow_cmp_unmasked_key(flow, &match)) + goto err_unlock_ovs; - /* Clear stats. */ - if (a[OVS_FLOW_ATTR_CLEAR]) - ovs_flow_stats_clear(flow); + /* Update actions, if present. */ + if (acts) { + struct sw_flow_actions *old_acts; + + old_acts = ovsl_dereference(flow->sf_acts); + rcu_assign_pointer(flow->sf_acts, acts); + ovs_nla_free_flow_actions(old_acts); } + + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + /* Clear stats. */ + if (a[OVS_FLOW_ATTR_CLEAR]) + ovs_flow_stats_clear(flow); ovs_unlock(); if (reply) { @@ -915,8 +989,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); err_kfree: @@ -1078,7 +1150,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set + .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ @@ -1094,7 +1166,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, - .doit = ovs_flow_cmd_new_or_set, + .doit = ovs_flow_cmd_set, }, }; -- cgit v1.2.3 From 893f139b9a6c00c097b9082a90f3041cfb3a0d20 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Mon, 5 May 2014 15:22:25 -0700 Subject: openvswitch: Minimize ovs_flow_cmd_new|set critical sections. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 192 +++++++++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 76 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 22665a6144fc..6bc9d94c8df2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -796,8 +796,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; - struct sw_flow_key key, masked_key; - struct sw_flow *flow; + struct sw_flow *flow, *new_flow; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; @@ -805,61 +804,77 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_match match; int error; - /* Extract key. */ + /* Must have key and actions. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; + if (!a[OVS_FLOW_ATTR_ACTIONS]) + goto error; - ovs_match_init(&match, &key, &mask); + /* Most of the time we need to allocate a new flow, do it before + * locking. + */ + new_flow = ovs_flow_alloc(); + if (IS_ERR(new_flow)) { + error = PTR_ERR(new_flow); + goto error; + } + + /* Extract key. */ + ovs_match_init(&match, &new_flow->unmasked_key, &mask); error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) - goto error; + goto err_kfree_flow; - /* Validate actions. */ - error = -EINVAL; - if (!a[OVS_FLOW_ATTR_ACTIONS]) - goto error; + ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); + /* Validate actions. */ acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) - goto error; + goto err_kfree_flow; - ovs_flow_mask_key(&masked_key, &key, &mask); - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], - &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check if this is a duplicate flow */ - flow = ovs_flow_tbl_lookup(&dp->table, &key); - if (!flow) { - /* Allocate flow. */ - flow = ovs_flow_alloc(); - if (IS_ERR(flow)) { - error = PTR_ERR(flow); - goto err_unlock_ovs; - } - - flow->key = masked_key; - flow->unmasked_key = key; - rcu_assign_pointer(flow->sf_acts, acts); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key); + if (likely(!flow)) { + rcu_assign_pointer(new_flow->sf_acts, acts); /* Put flow in bucket. */ - error = ovs_flow_tbl_insert(&dp->table, flow, &mask); - if (error) { + error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask); + if (unlikely(error)) { acts = NULL; - goto err_flow_free; + goto err_unlock_ovs; } + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(new_flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); } else { struct sw_flow_actions *old_acts; @@ -869,39 +884,45 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ - error = -EEXIST; - if (info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) + if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE + | NLM_F_EXCL))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); - } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); - ovs_unlock(); + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + ovs_unlock(); - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0, - PTR_ERR(reply)); + ovs_nla_free_flow_actions(old_acts); + ovs_flow_free(new_flow, false); } + + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); return 0; -err_flow_free: - ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); +err_kfree_flow: + ovs_flow_free(new_flow, false); error: return error; } @@ -915,7 +936,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply = NULL; struct datapath *dp; - struct sw_flow_actions *acts = NULL; + struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; int error; @@ -942,56 +963,75 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree; + goto err_kfree_acts; + } + } + + /* Can allocate before locking if have acts. */ + if (acts) { + reply = ovs_flow_cmd_alloc_info(acts, info, false); + if (IS_ERR(reply)) { + error = PTR_ERR(reply); + goto err_kfree_acts; } } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - error = -ENODEV; - if (!dp) + if (unlikely(!dp)) { + error = -ENODEV; goto err_unlock_ovs; - + } /* Check that the flow exists. */ flow = ovs_flow_tbl_lookup(&dp->table, &key); - error = -ENOENT; - if (!flow) + if (unlikely(!flow)) { + error = -ENOENT; goto err_unlock_ovs; - + } /* The unmasked key has to be the same for flow updates. */ - error = -EEXIST; - if (!ovs_flow_cmp_unmasked_key(flow, &match)) + if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) { + error = -EEXIST; goto err_unlock_ovs; - + } /* Update actions, if present. */ - if (acts) { - struct sw_flow_actions *old_acts; - + if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); - ovs_nla_free_flow_actions(old_acts); + + if (unlikely(reply)) { + error = ovs_flow_cmd_fill_info(flow, + ovs_header->dp_ifindex, + reply, info->snd_portid, + info->snd_seq, 0, + OVS_FLOW_CMD_NEW); + BUG_ON(error < 0); + } + } else { + /* Could not alloc without acts before locking. */ + reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, + info, OVS_FLOW_CMD_NEW, false); + if (unlikely(IS_ERR(reply))) { + error = PTR_ERR(reply); + goto err_unlock_ovs; + } } - reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, - info, OVS_FLOW_CMD_NEW, false); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) ovs_flow_stats_clear(flow); ovs_unlock(); - if (reply) { - if (!IS_ERR(reply)) - ovs_notify(&dp_flow_genl_family, reply, info); - else - genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, - 0, PTR_ERR(reply)); - } + if (reply) + ovs_notify(&dp_flow_genl_family, reply, info); + if (old_acts) + ovs_nla_free_flow_actions(old_acts); return 0; err_unlock_ovs: ovs_unlock(); -err_kfree: + kfree_skb(reply); +err_kfree_acts: kfree(acts); error: return error; -- cgit v1.2.3 From 0c200ef94c9492205e18a18c25650cf27939889c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 6 May 2014 16:44:50 -0700 Subject: openvswitch: Simplify genetlink code. Following patch get rid of struct genl_family_and_ops which is redundant due to changes to struct genl_family. Signed-off-by: Kyle Mestery Acked-by: Kyle Mestery Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 185 ++++++++++++++++++++++----------------------- 1 file changed, 90 insertions(+), 95 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6bc9d94c8df2..0d407bca81e3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,11 +44,11 @@ #include #include #include -#include #include #include #include -#include +#include +#include #include #include #include @@ -62,6 +62,22 @@ int ovs_net_id __read_mostly; +static struct genl_family dp_packet_genl_family; +static struct genl_family dp_flow_genl_family; +static struct genl_family dp_datapath_genl_family; + +static struct genl_multicast_group ovs_dp_flow_multicast_group = { + .name = OVS_FLOW_MCGROUP +}; + +static struct genl_multicast_group ovs_dp_datapath_multicast_group = { + .name = OVS_DATAPATH_MCGROUP +}; + +struct genl_multicast_group ovs_dp_vport_multicast_group = { + .name = OVS_VPORT_MCGROUP +}; + /* Check if need to build a reply message. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */ static bool ovs_must_notify(struct genl_info *info, @@ -272,16 +288,6 @@ out: u64_stats_update_end(&stats->syncp); } -static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_PACKET_FAMILY, - .version = OVS_PACKET_VERSION, - .maxattr = OVS_PACKET_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { @@ -600,6 +606,18 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; +static struct genl_family dp_packet_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_PACKET_FAMILY, + .version = OVS_PACKET_VERSION, + .maxattr = OVS_PACKET_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_packet_genl_ops, + .n_ops = ARRAY_SIZE(dp_packet_genl_ops), +}; + static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, struct ovs_dp_megaflow_stats *mega_stats) { @@ -631,26 +649,6 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, } } -static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { - [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, - [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, -}; - -static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = sizeof(struct ovs_header), - .name = OVS_FLOW_FAMILY, - .version = OVS_FLOW_VERSION, - .maxattr = OVS_FLOW_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_flow_multicast_group = { - .name = OVS_FLOW_MCGROUP -}; - static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -1186,7 +1184,13 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_flow_genl_ops[] = { +static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { + [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, + [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, +}; + +static struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, @@ -1210,24 +1214,18 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { - [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, -}; - -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_DATAPATH_FAMILY, - .version = OVS_DATAPATH_VERSION, - .maxattr = OVS_DP_ATTR_MAX, + .name = OVS_FLOW_FAMILY, + .version = OVS_FLOW_VERSION, + .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_datapath_multicast_group = { - .name = OVS_DATAPATH_MCGROUP + .ops = dp_flow_genl_ops, + .n_ops = ARRAY_SIZE(dp_flow_genl_ops), + .mcgrps = &ovs_dp_flow_multicast_group, + .n_mcgrps = 1, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1574,7 +1572,13 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const struct genl_ops dp_datapath_genl_ops[] = { +static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { + [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, +}; + +static struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, @@ -1598,27 +1602,18 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { - [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, - [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, - [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, -}; - -struct genl_family dp_vport_genl_family = { +static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), - .name = OVS_VPORT_FAMILY, - .version = OVS_VPORT_VERSION, - .maxattr = OVS_VPORT_ATTR_MAX, + .name = OVS_DATAPATH_FAMILY, + .version = OVS_DATAPATH_VERSION, + .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, .parallel_ops = true, -}; - -static struct genl_multicast_group ovs_dp_vport_multicast_group = { - .name = OVS_VPORT_MCGROUP + .ops = dp_datapath_genl_ops, + .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), + .mcgrps = &ovs_dp_datapath_multicast_group, + .n_mcgrps = 1, }; /* Called with ovs_mutex or RCU read lock. */ @@ -1941,7 +1936,16 @@ out: return skb->len; } -static const struct genl_ops dp_vport_genl_ops[] = { +static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { + [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, + [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, +}; + +static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, @@ -1965,26 +1969,25 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family_and_ops { - struct genl_family *family; - const struct genl_ops *ops; - int n_ops; - const struct genl_multicast_group *group; +struct genl_family dp_vport_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = sizeof(struct ovs_header), + .name = OVS_VPORT_FAMILY, + .version = OVS_VPORT_VERSION, + .maxattr = OVS_VPORT_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_vport_genl_ops, + .n_ops = ARRAY_SIZE(dp_vport_genl_ops), + .mcgrps = &ovs_dp_vport_multicast_group, + .n_mcgrps = 1, }; -static const struct genl_family_and_ops dp_genl_families[] = { - { &dp_datapath_genl_family, - dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), - &ovs_dp_datapath_multicast_group }, - { &dp_vport_genl_family, - dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), - &ovs_dp_vport_multicast_group }, - { &dp_flow_genl_family, - dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), - &ovs_dp_flow_multicast_group }, - { &dp_packet_genl_family, - dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), - NULL }, +static struct genl_family * const dp_genl_families[] = { + &dp_datapath_genl_family, + &dp_vport_genl_family, + &dp_flow_genl_family, + &dp_packet_genl_family, }; static void dp_unregister_genl(int n_families) @@ -1992,33 +1995,25 @@ static void dp_unregister_genl(int n_families) int i; for (i = 0; i < n_families; i++) - genl_unregister_family(dp_genl_families[i].family); + genl_unregister_family(dp_genl_families[i]); } static int dp_register_genl(void) { - int n_registered; int err; int i; - n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { - const struct genl_family_and_ops *f = &dp_genl_families[i]; - f->family->ops = f->ops; - f->family->n_ops = f->n_ops; - f->family->mcgrps = f->group; - f->family->n_mcgrps = f->group ? 1 : 0; - err = genl_register_family(f->family); + err = genl_register_family(dp_genl_families[i]); if (err) goto error; - n_registered++; } return 0; error: - dp_unregister_genl(n_registered); + dp_unregister_genl(i); return err; } -- cgit v1.2.3 From 359a0ea9875ef4f32c8425bbe1ae348e1fd2ed2a Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 4 Jun 2014 17:20:29 -0700 Subject: vxlan: Add support for UDP checksums (v4 sending, v6 zero csums) Added VXLAN link configuration for sending UDP checksums, and allowing TX and RX of UDP6 checksums. Also, call common iptunnel_handle_offloads and added GSO support for checksums. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/openvswitch/vport-vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index a93efa3f64c3..0edbd95c60e7 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false); + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; -- cgit v1.2.3