From 4a963d074a6e88b6057713b7c9dec4f6065d0fd4 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Nov 2021 15:57:03 +0100 Subject: xen/netfront: read response from backend only once commit 8446066bf8c1f9f7b7412c43fbea0fb87464d75b upstream. In order to avoid problems in case the backend is modifying a response on the ring page while the frontend has already seen it, just read the response into a local buffer in one go and then operate on that buffer only. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6d4bf37c660f..3cc5eaead05d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -387,13 +387,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = queue->tx.rsp_cons; cons != prod; cons++) { - struct xen_netif_tx_response *txrsp; + struct xen_netif_tx_response txrsp; - txrsp = RING_GET_RESPONSE(&queue->tx, cons); - if (txrsp->status == XEN_NETIF_RSP_NULL) + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); + if (txrsp.status == XEN_NETIF_RSP_NULL) continue; - id = txrsp->id; + id = txrsp.id; skb = queue->tx_skbs[id].skb; if (unlikely(gnttab_query_foreign_access( queue->grant_tx_ref[id]) != 0)) { @@ -736,7 +736,7 @@ static int xennet_get_extras(struct netfront_queue *queue, RING_IDX rp) { - struct xen_netif_extra_info *extra; + struct xen_netif_extra_info extra; struct device *dev = &queue->info->netdev->dev; RING_IDX cons = queue->rx.rsp_cons; int err = 0; @@ -752,24 +752,22 @@ static int xennet_get_extras(struct netfront_queue *queue, break; } - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, ++cons); + RING_COPY_RESPONSE(&queue->rx, ++cons, &extra); - if (unlikely(!extra->type || - extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { if (net_ratelimit()) dev_warn(dev, "Invalid extra type: %d\n", - extra->type); + extra.type); err = -EINVAL; } else { - memcpy(&extras[extra->type - 1], extra, - sizeof(*extra)); + extras[extra.type - 1] = extra; } skb = xennet_get_rx_skb(queue, cons); ref = xennet_get_rx_ref(queue, cons); xennet_move_rx_slot(queue, skb, ref); - } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); queue->rx.rsp_cons = cons; return err; @@ -779,7 +777,7 @@ static int xennet_get_responses(struct netfront_queue *queue, struct netfront_rx_info *rinfo, RING_IDX rp, struct sk_buff_head *list) { - struct xen_netif_rx_response *rx = &rinfo->rx; + struct xen_netif_rx_response *rx = &rinfo->rx, rx_local; struct xen_netif_extra_info *extras = rinfo->extras; struct device *dev = &queue->info->netdev->dev; RING_IDX cons = queue->rx.rsp_cons; @@ -837,7 +835,8 @@ next: break; } - rx = RING_GET_RESPONSE(&queue->rx, cons + slots); + RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local); + rx = &rx_local; skb = xennet_get_rx_skb(queue, cons + slots); ref = xennet_get_rx_ref(queue, cons + slots); slots++; @@ -892,10 +891,11 @@ static int xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *nskb; while ((nskb = __skb_dequeue(list))) { - struct xen_netif_rx_response *rx = - RING_GET_RESPONSE(&queue->rx, ++cons); + struct xen_netif_rx_response rx; skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; + RING_COPY_RESPONSE(&queue->rx, ++cons, &rx); + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; @@ -910,7 +910,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(nfrag), - rx->offset, rx->status, PAGE_SIZE); + rx.offset, rx.status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); @@ -1008,7 +1008,7 @@ static int xennet_poll(struct napi_struct *napi, int budget) i = queue->rx.rsp_cons; work_done = 0; while ((i != rp) && (work_done < budget)) { - memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx)); + RING_COPY_RESPONSE(&queue->rx, i, rx); memset(extras, 0, sizeof(rinfo.extras)); err = xennet_get_responses(queue, &rinfo, rp, &tmpq); -- cgit v1.2.3 From c40d7884f51faefb51b40f483d75271c1cbaeb46 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Nov 2021 15:58:09 +0100 Subject: xen/netfront: don't read data from request on the ring page commit 162081ec33c2686afa29d91bf8d302824aa846c7 upstream. In order to avoid a malicious backend being able to influence the local processing of a request build the request locally first and then copy it to the ring page. Any reading from the request influencing the processing in the frontend needs to be done on the local instance. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 80 +++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 43 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3cc5eaead05d..416b33d2da67 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -433,7 +433,8 @@ struct xennet_gnttab_make_txreq { struct netfront_queue *queue; struct sk_buff *skb; struct page *page; - struct xen_netif_tx_request *tx; /* Last request */ + struct xen_netif_tx_request *tx; /* Last request on ring page */ + struct xen_netif_tx_request tx_local; /* Last request local copy*/ unsigned int size; }; @@ -461,30 +462,27 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, queue->grant_tx_page[id] = page; queue->grant_tx_ref[id] = ref; - tx->id = id; - tx->gref = ref; - tx->offset = offset; - tx->size = len; - tx->flags = 0; + info->tx_local.id = id; + info->tx_local.gref = ref; + info->tx_local.offset = offset; + info->tx_local.size = len; + info->tx_local.flags = 0; + + *tx = info->tx_local; info->tx = tx; - info->size += tx->size; + info->size += info->tx_local.size; } static struct xen_netif_tx_request *xennet_make_first_txreq( - struct netfront_queue *queue, struct sk_buff *skb, - struct page *page, unsigned int offset, unsigned int len) + struct xennet_gnttab_make_txreq *info, + unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .page = page, - .size = 0, - }; + info->size = 0; - gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info); + gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info); - return info.tx; + return info->tx; } static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, @@ -497,35 +495,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, xennet_tx_setup_grant(gfn, offset, len, data); } -static struct xen_netif_tx_request *xennet_make_txreqs( - struct netfront_queue *queue, struct xen_netif_tx_request *tx, - struct sk_buff *skb, struct page *page, +static void xennet_make_txreqs( + struct xennet_gnttab_make_txreq *info, + struct page *page, unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .tx = tx, - }; - /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; while (len) { - info.page = page; - info.size = 0; + info->page = page; + info->size = 0; gnttab_foreach_grant_in_range(page, offset, len, xennet_make_one_txreq, - &info); + info); page++; offset = 0; - len -= info.size; + len -= info->size; } - - return info.tx; } /* @@ -578,7 +568,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); - struct xen_netif_tx_request *tx, *first_tx; + struct xen_netif_tx_request *first_tx; unsigned int i; int notify; int slots; @@ -587,6 +577,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len; unsigned long flags; struct netfront_queue *queue = NULL; + struct xennet_gnttab_make_txreq info = { }; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; @@ -629,21 +620,24 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) } /* First request for the linear area. */ - first_tx = tx = xennet_make_first_txreq(queue, skb, - page, offset, len); - offset += tx->size; + info.queue = queue; + info.skb = skb; + info.page = page; + first_tx = xennet_make_first_txreq(&info, offset, len); + offset += info.tx_local.size; if (offset == PAGE_SIZE) { page++; offset = 0; } - len -= tx->size; + len -= info.tx_local.size; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ - tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_csum_blank | + XEN_NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ - tx->flags |= XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_data_validated; /* Optional extra info after the first request. */ if (skb_shinfo(skb)->gso_size) { @@ -652,7 +646,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); - tx->flags |= XEN_NETTXF_extra_info; + first_tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? @@ -666,13 +660,13 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) } /* Requests for the rest of the linear area. */ - tx = xennet_make_txreqs(queue, tx, skb, page, offset, len); + xennet_make_txreqs(&info, page, offset, len); /* Requests for all the frags. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - tx = xennet_make_txreqs(queue, tx, skb, - skb_frag_page(frag), frag->page_offset, + xennet_make_txreqs(&info, skb_frag_page(frag), + frag->page_offset, skb_frag_size(frag)); } -- cgit v1.2.3 From d01441523b71e60f29da18050cc540ccd578aa3b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Nov 2021 15:58:51 +0100 Subject: xen/netfront: disentangle tx_skb_freelist commit 21631d2d741a64a073e167c27769e73bc7844a2f upstream. The tx_skb_freelist elements are in a single linked list with the request id used as link reference. The per element link field is in a union with the skb pointer of an in use request. Move the link reference out of the union in order to enable a later reuse of it for requests which need a populated skb pointer. Rename add_id_to_freelist() and get_id_from_freelist() to add_id_to_list() and get_id_from_list() in order to prepare using those for other lists as well. Define ~0 as value to indicate the end of a list and place that value into the link for a request not being on the list. When freeing a skb zero the skb pointer in the request. Use a NULL value of the skb pointer instead of skb_entry_is_link() for deciding whether a request has a skb linked to it. Remove skb_entry_set_link() and open code it instead as it is really trivial now. Signed-off-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 61 +++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 36 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 416b33d2da67..7c1734f1c913 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -120,17 +120,11 @@ struct netfront_queue { /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries - * are linked from tx_skb_freelist through skb_entry.link. - * - * NB. Freelist index entries are always going to be less than - * PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than PAGE_OFFSET: we use this property to distinguish - * them. + * are linked from tx_skb_freelist through tx_link. */ - union skb_entry { - struct sk_buff *skb; - unsigned long link; - } tx_skbs[NET_TX_RING_SIZE]; + struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; + unsigned short tx_link[NET_TX_RING_SIZE]; +#define TX_LINK_NONE 0xffff grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; struct page *grant_tx_page[NET_TX_RING_SIZE]; @@ -168,33 +162,25 @@ struct netfront_rx_info { struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; -static void skb_entry_set_link(union skb_entry *list, unsigned short id) -{ - list->link = id; -} - -static int skb_entry_is_link(const union skb_entry *list) -{ - BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link)); - return (unsigned long)list->skb < PAGE_OFFSET; -} - /* * Access macros for acquiring freeing slots in tx_skbs[]. */ -static void add_id_to_freelist(unsigned *head, union skb_entry *list, - unsigned short id) +static void add_id_to_list(unsigned *head, unsigned short *list, + unsigned short id) { - skb_entry_set_link(&list[id], *head); + list[id] = *head; *head = id; } -static unsigned short get_id_from_freelist(unsigned *head, - union skb_entry *list) +static unsigned short get_id_from_list(unsigned *head, unsigned short *list) { unsigned int id = *head; - *head = list[id].link; + + if (id != TX_LINK_NONE) { + *head = list[id]; + list[id] = TX_LINK_NONE; + } return id; } @@ -394,7 +380,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) continue; id = txrsp.id; - skb = queue->tx_skbs[id].skb; + skb = queue->tx_skbs[id]; + queue->tx_skbs[id] = NULL; if (unlikely(gnttab_query_foreign_access( queue->grant_tx_ref[id]) != 0)) { pr_alert("%s: warning -- grant still in use by backend domain\n", @@ -407,7 +394,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) &queue->gref_tx_head, queue->grant_tx_ref[id]); queue->grant_tx_ref[id] = GRANT_INVALID_REF; queue->grant_tx_page[id] = NULL; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id); dev_kfree_skb_irq(skb); } @@ -450,7 +437,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, struct netfront_queue *queue = info->queue; struct sk_buff *skb = info->skb; - id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); + id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); @@ -458,7 +445,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); - queue->tx_skbs[id].skb = skb; + queue->tx_skbs[id] = skb; queue->grant_tx_page[id] = page; queue->grant_tx_ref[id] = ref; @@ -1126,17 +1113,18 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue) for (i = 0; i < NET_TX_RING_SIZE; i++) { /* Skip over entries which are actually freelist references */ - if (skb_entry_is_link(&queue->tx_skbs[i])) + if (!queue->tx_skbs[i]) continue; - skb = queue->tx_skbs[i].skb; + skb = queue->tx_skbs[i]; + queue->tx_skbs[i] = NULL; get_page(queue->grant_tx_page[i]); gnttab_end_foreign_access(queue->grant_tx_ref[i], GNTMAP_readonly, (unsigned long)page_address(queue->grant_tx_page[i])); queue->grant_tx_page[i] = NULL; queue->grant_tx_ref[i] = GRANT_INVALID_REF; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i); dev_kfree_skb_irq(skb); } } @@ -1637,13 +1625,14 @@ static int xennet_init_queue(struct netfront_queue *queue) snprintf(queue->name, sizeof(queue->name), "vif%s-q%u", devid, queue->id); - /* Initialise tx_skbs as a free chain containing every entry. */ + /* Initialise tx_skb_freelist as a free chain containing every entry. */ queue->tx_skb_freelist = 0; for (i = 0; i < NET_TX_RING_SIZE; i++) { - skb_entry_set_link(&queue->tx_skbs[i], i+1); + queue->tx_link[i] = i + 1; queue->grant_tx_ref[i] = GRANT_INVALID_REF; queue->grant_tx_page[i] = NULL; } + queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE; /* Clear out rx_skbs */ for (i = 0; i < NET_RX_RING_SIZE; i++) { -- cgit v1.2.3 From d364d387a0f2e905b9272ee6835a95ea2862caff Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 29 Nov 2021 15:59:33 +0100 Subject: xen/netfront: don't trust the backend response data blindly commit a884daa61a7d91650987e855464526aef219590f upstream. Today netfront will trust the backend to send only sane response data. In order to avoid privilege escalations or crashes in case of malicious backends verify the data to be within expected limits. Especially make sure that the response always references an outstanding request. Note that only the tx queue needs special id handling, as for the rx queue the id is equal to the index in the ring page. Introduce a new indicator for the device whether it is broken and let the device stop working when it is set. Set this indicator in case the backend sets any weird data. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 80 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7c1734f1c913..46b9f379035f 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -125,10 +125,12 @@ struct netfront_queue { struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; unsigned short tx_link[NET_TX_RING_SIZE]; #define TX_LINK_NONE 0xffff +#define TX_PENDING 0xfffe grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; struct page *grant_tx_page[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; + unsigned int tx_pend_queue; spinlock_t rx_lock ____cacheline_aligned_in_smp; struct xen_netif_rx_front_ring rx; @@ -154,6 +156,9 @@ struct netfront_info { struct netfront_stats __percpu *rx_stats; struct netfront_stats __percpu *tx_stats; + /* Is device behaving sane? */ + bool broken; + atomic_t rx_gso_checksum_fixup; }; @@ -338,7 +343,7 @@ static int xennet_open(struct net_device *dev) unsigned int i = 0; struct netfront_queue *queue = NULL; - if (!np->queues) + if (!np->queues || np->broken) return -ENODEV; for (i = 0; i < num_queues; ++i) { @@ -365,11 +370,17 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; + const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); do { prod = queue->tx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) { + dev_alert(dev, "Illegal number of responses %u\n", + prod - queue->tx.rsp_cons); + goto err; + } rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = queue->tx.rsp_cons; cons != prod; cons++) { @@ -379,14 +390,27 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) if (txrsp.status == XEN_NETIF_RSP_NULL) continue; - id = txrsp.id; + id = txrsp.id; + if (id >= RING_SIZE(&queue->tx)) { + dev_alert(dev, + "Response has incorrect id (%u)\n", + id); + goto err; + } + if (queue->tx_link[id] != TX_PENDING) { + dev_alert(dev, + "Response for inactive request\n"); + goto err; + } + + queue->tx_link[id] = TX_LINK_NONE; skb = queue->tx_skbs[id]; queue->tx_skbs[id] = NULL; if (unlikely(gnttab_query_foreign_access( queue->grant_tx_ref[id]) != 0)) { - pr_alert("%s: warning -- grant still in use by backend domain\n", - __func__); - BUG(); + dev_alert(dev, + "Grant still in use by backend domain\n"); + goto err; } gnttab_end_foreign_access_ref( queue->grant_tx_ref[id], GNTMAP_readonly); @@ -414,6 +438,12 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) } while ((cons == prod) && (prod != queue->tx.sring->rsp_prod)); xennet_maybe_wake_tx(queue); + + return; + + err: + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); } struct xennet_gnttab_make_txreq { @@ -457,6 +487,12 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, *tx = info->tx_local; + /* + * Put the request in the pending queue, it will be set to be pending + * when the producer index is about to be raised. + */ + add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id); + info->tx = tx; info->size += info->tx_local.size; } @@ -549,6 +585,15 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, return queue_idx; } +static void xennet_mark_tx_pending(struct netfront_queue *queue) +{ + unsigned int i; + + while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) != + TX_LINK_NONE) + queue->tx_link[i] = TX_PENDING; +} + #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -571,6 +616,8 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Drop the packet if no queues are set up */ if (num_queues < 1) goto drop; + if (unlikely(np->broken)) + goto drop; /* Determine which queue to transmit this SKB on */ queue_index = skb_get_queue_mapping(skb); queue = &np->queues[queue_index]; @@ -660,6 +707,8 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) /* First request has the packet length. */ first_tx->size = skb->len; + xennet_mark_tx_pending(queue); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); if (notify) notify_remote_via_irq(queue->tx_irq); @@ -984,6 +1033,13 @@ static int xennet_poll(struct napi_struct *napi, int budget) skb_queue_head_init(&tmpq); rp = queue->rx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) { + dev_alert(&dev->dev, "Illegal number of responses %u\n", + rp - queue->rx.rsp_cons); + queue->info->broken = true; + spin_unlock(&queue->rx_lock); + return 0; + } rmb(); /* Ensure we see queued responses up to 'rp'. */ i = queue->rx.rsp_cons; @@ -1224,6 +1280,9 @@ static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) struct netfront_queue *queue = dev_id; unsigned long flags; + if (queue->info->broken) + return IRQ_HANDLED; + spin_lock_irqsave(&queue->tx_lock, flags); xennet_tx_buf_gc(queue); spin_unlock_irqrestore(&queue->tx_lock, flags); @@ -1236,6 +1295,9 @@ static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) struct netfront_queue *queue = dev_id; struct net_device *dev = queue->info->netdev; + if (queue->info->broken) + return IRQ_HANDLED; + if (likely(netif_carrier_ok(dev) && RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) napi_schedule(&queue->napi); @@ -1257,6 +1319,10 @@ static void xennet_poll_controller(struct net_device *dev) struct netfront_info *info = netdev_priv(dev); unsigned int num_queues = dev->real_num_tx_queues; unsigned int i; + + if (info->broken) + return; + for (i = 0; i < num_queues; ++i) xennet_interrupt(0, &info->queues[i]); } @@ -1627,6 +1693,7 @@ static int xennet_init_queue(struct netfront_queue *queue) /* Initialise tx_skb_freelist as a free chain containing every entry. */ queue->tx_skb_freelist = 0; + queue->tx_pend_queue = TX_LINK_NONE; for (i = 0; i < NET_TX_RING_SIZE; i++) { queue->tx_link[i] = i + 1; queue->grant_tx_ref[i] = GRANT_INVALID_REF; @@ -1842,6 +1909,9 @@ static int talk_to_netback(struct xenbus_device *dev, if (info->queues) xennet_destroy_queues(info); + /* For the case of a reconnect reset the "broken" indicator. */ + info->broken = false; + err = xennet_create_queues(info, &num_queues); if (err < 0) { xenbus_dev_fatal(dev, err, "creating queues"); -- cgit v1.2.3 From 81900aa7d7a130dec4c55b68875e30fb8c9effec Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: xen/netfront: harden netfront against event channel storms commit b27d47950e481f292c0a5ad57357edb9d95d03ba upstream. The Xen netfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. For being able to detect the case of no rx responses being added while the carrier is down a new lock is needed in order to update and test rsp_cons and the number of seen unconsumed responses atomically. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 125 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 94 insertions(+), 31 deletions(-) (limited to 'drivers/net/xen-netfront.c') diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 46b9f379035f..637d5e894012 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -141,6 +141,9 @@ struct netfront_queue { struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; }; struct netfront_info { @@ -365,11 +368,12 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; + bool work_done = false; const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); @@ -386,6 +390,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) for (cons = queue->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response txrsp; + work_done = true; + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); if (txrsp.status == XEN_NETIF_RSP_NULL) continue; @@ -439,11 +445,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) xennet_maybe_wake_tx(queue); - return; + return work_done; err: queue->info->broken = true; dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { @@ -748,6 +756,16 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -799,7 +817,7 @@ static int xennet_get_extras(struct netfront_queue *queue, xennet_move_rx_slot(queue, skb, ref); } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -879,7 +897,7 @@ next: } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -933,7 +951,8 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } @@ -946,7 +965,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1067,7 +1086,9 @@ err: if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1091,7 +1112,8 @@ err: __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } @@ -1275,40 +1297,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } -static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int work_queued; + unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; + + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); + return true; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1540,9 +1601,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1570,18 +1632,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1683,6 +1745,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); setup_timer(&queue->rx_refill_timer, rx_refill_timeout, (unsigned long)queue); -- cgit v1.2.3