summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-02-11 15:12:19 -0800
committerDavid S. Miller <davem@davemloft.net>2015-02-11 15:12:19 -0800
commit777b3e930ac8eb1f8360b3e4f2aaf5e4abe5ed46 (patch)
treefc3c9744cddeb0bba07c0ed258de06df4eb01487 /include/linux
parent13101602c4a9f653d59af9469040797bc5b361ca (diff)
parentfe881ef11cf0220f118816181930494d484c4883 (diff)
Merge branch 'rco_correctness'
Tom Herbert says: ==================== net: Fixes to remote checksum offload and CHECKSUM_PARTIAL This patch set fixes a correctness problem with remote checksum offload, clarifies the meaning of CHECKSUM_PARTIAL, and allows remote checksum offload to set CHECKSUM_PARTIAL instead of calling csum_partial and modifying the checksum. Specifically: - In the GRO remote checksum path, restore the checksum after calling lower layer GRO functions. This is needed if the packet is forwarded off host with the Remote Checksum Offload option still present. - Clarify meaning of CHECKSUM PARTIAL in the receive path. Only the checksums referred to by checksum partial and any preceding checksums can be considered verified. - Fixes to UDP tunnel GRO complete. Need to set SKB_GSO_UDP_TUNNEL_*, SKB_GSO_TUNNEL_REMCSUM, and skb->encapsulation for forwarding case. - Infrastructure to allow setting of CHECKSUM_PARTIAL in remote checksum offload. This a potential performance benefit instead of calling csum_partial (potentially twice, once in GRO path and once in normal path). The downside of using CHECKSUM_PARTIAL and not actually writing the checksum is that we aren't verifying that the sender correctly wrote the pseudo checksum into the checksum field, or that the start/offset values actually point to a checksum. If the sender did not set up these fields correctly, a packet might be accepted locally, but not accepted by a peer when the packet is forwarded off host. Verifying these fields seems non-trivial, and because the fields can only be incorrect due to sender error and not corruption (outer checksum protects against that) we'll make use of CHECKSUM_PARTIAL the default. This behavior can be reverted as an netlink option on the encapsulation socket. - Change VXLAN and GUE to set CHECKSUM_PARTIAL in remote checksum offload by default, configuration hooks can revert to using csum_partial. Testing: I ran performance numbers using netperf TCP_STREAM and TCP_RR with 200 streams for GRE/GUE and for VXLAN. This compares before the fixes, the fixes with not setting checksum partial in remote checksum offload, and with the fixes setting checksum partial. The overall effect seems be that using checksum partial is a slight performance win, perf definitely shows a significant reduction of time in csum_partial on the receive CPUs. GRE/GUE TCP_STREAM Before fixes 9.22% TX CPU utilization 13.57% RX CPU utilization 9133 Mbps Not using checksum partial 9.59% TX CPU utilization 14.95% RX CPU utilization 9132 Mbps Using checksum partial 9.37% TX CPU utilization 13.89% RX CPU utilization 9132 Mbps TCP_RR Before fixes CPU utilization 159/251/447 90/95/99% latencies 1.1462e+06 tps Not using checksum partial 92.94% CPU utilization 158/253/445 90/95/99% latencies 1.12988e+06 tps Using checksum partial 92.78% CPU utilization 158/250/450 90/95/99% latencies 1.15343e+06 tps VXLAN TCP_STREAM Before fixes 9.24% TX CPU utilization 13.74% RX CPU utilization 9093 Mbps Not using checksum partial 9.95% TX CPU utilization 14.66% RX CPU utilization 9094 Mbps Using checksum partial 10.24% TX CPU utilization 13.32% RX CPU utilization 9093 Mbps TCP_RR Before fixes 92.91% CPU utilization 151/241/437 90/95/99% latencies 1.15939e+06 tps Not using checksum partial 93.07% CPU utilization 156/246/425 90/95/99% latencies 1.1451e+06 tps Using checksum partial 95.51% CPU utilization 156/249/459 90/95/99% latencies 1.17004e+06 tps ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netdevice.h62
-rw-r--r--include/linux/skbuff.h32
2 files changed, 78 insertions, 16 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d115256ed5a2..5897b4ea5a3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1923,13 +1923,8 @@ struct napi_gro_cb {
/* Number of segments aggregated. */
u16 count;
- /* This is non-zero if the packet may be of the same flow. */
- u8 same_flow;
-
- /* Free the skb? */
- u8 free;
-#define NAPI_GRO_FREE 1
-#define NAPI_GRO_FREE_STOLEN_HEAD 2
+ /* Start offset for remote checksum offload */
+ u16 gro_remcsum_start;
/* jiffies when first packet was created/queued */
unsigned long age;
@@ -1937,6 +1932,9 @@ struct napi_gro_cb {
/* Used in ipv6_gro_receive() and foo-over-udp */
u16 proto;
+ /* This is non-zero if the packet may be of the same flow. */
+ u8 same_flow:1;
+
/* Used in udp_gro_receive */
u8 udp_mark:1;
@@ -1946,9 +1944,16 @@ struct napi_gro_cb {
/* Number of checksums via CHECKSUM_UNNECESSARY */
u8 csum_cnt:3;
+ /* Free the skb? */
+ u8 free:2;
+#define NAPI_GRO_FREE 1
+#define NAPI_GRO_FREE_STOLEN_HEAD 2
+
/* Used in foo-over-udp, set in udp[46]_gro_receive */
u8 is_ipv6:1;
+ /* 7 bit hole */
+
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
@@ -2242,11 +2247,20 @@ static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb)
+{
+ return (NAPI_GRO_CB(skb)->gro_remcsum_start - skb_headroom(skb) ==
+ skb_gro_offset(skb));
+}
+
static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
bool zero_okay,
__sum16 check)
{
- return (skb->ip_summed != CHECKSUM_PARTIAL &&
+ return ((skb->ip_summed != CHECKSUM_PARTIAL ||
+ skb_checksum_start_offset(skb) <
+ skb_gro_offset(skb)) &&
+ !skb_at_gro_remcsum_start(skb) &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
(!zero_okay || check));
}
@@ -2321,20 +2335,48 @@ do { \
compute_pseudo(skb, proto)); \
} while (0)
+struct gro_remcsum {
+ int offset;
+ __wsum delta;
+};
+
+static inline void skb_gro_remcsum_init(struct gro_remcsum *grc)
+{
+ grc->delta = 0;
+}
+
static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
- int start, int offset)
+ int start, int offset,
+ struct gro_remcsum *grc,
+ bool nopartial)
{
__wsum delta;
BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
+ if (!nopartial) {
+ NAPI_GRO_CB(skb)->gro_remcsum_start =
+ ((unsigned char *)ptr + start) - skb->head;
+ return;
+ }
+
delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
/* Adjust skb->csum since we changed the packet */
- skb->csum = csum_add(skb->csum, delta);
NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ grc->offset = (ptr + offset) - (void *)skb->head;
+ grc->delta = delta;
}
+static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
+ struct gro_remcsum *grc)
+{
+ if (!grc->delta)
+ return;
+
+ remcsum_unadjust((__sum16 *)(skb->head + grc->offset), grc->delta);
+}
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned short type,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1bb36edb66b9..30007afe70b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -83,11 +83,15 @@
*
* CHECKSUM_PARTIAL:
*
- * This is identical to the case for output below. This may occur on a packet
+ * A checksum is set up to be offloaded to a device as described in the
+ * output description for CHECKSUM_PARTIAL. This may occur on a packet
* received directly from another Linux OS, e.g., a virtualized Linux kernel
- * on the same host. The packet can be treated in the same way as
- * CHECKSUM_UNNECESSARY, except that on output (i.e., forwarding) the
- * checksum must be filled in by the OS or the hardware.
+ * on the same host, or it may be set in the input path in GRO or remote
+ * checksum offload. For the purposes of checksum verification, the checksum
+ * referred to by skb->csum_start + skb->csum_offset and any preceding
+ * checksums in the packet are considered verified. Any checksums in the
+ * packet that are after the checksum being offloaded are not considered to
+ * be verified.
*
* B. Checksumming on output.
*
@@ -2915,7 +2919,10 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb);
static inline int skb_csum_unnecessary(const struct sk_buff *skb)
{
- return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid);
+ return ((skb->ip_summed == CHECKSUM_UNNECESSARY) ||
+ skb->csum_valid ||
+ (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start_offset(skb) >= 0));
}
/**
@@ -3097,16 +3104,29 @@ do { \
compute_pseudo(skb, proto)); \
} while (0)
+static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr,
+ u16 start, u16 offset)
+{
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = ((unsigned char *)ptr + start) - skb->head;
+ skb->csum_offset = offset - start;
+}
+
/* Update skbuf and packet to reflect the remote checksum offload operation.
* When called, ptr indicates the starting point for skb->csum when
* ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
* here, skb_postpull_rcsum is done so skb->csum start is ptr.
*/
static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
- int start, int offset)
+ int start, int offset, bool nopartial)
{
__wsum delta;
+ if (!nopartial) {
+ skb_remcsum_adjust_partial(skb, ptr, start, offset);
+ return;
+ }
+
if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
__skb_checksum_complete(skb);
skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);