Fix softlockup in tx_bottom by preventing repeated failed SG fills#53
Fix softlockup in tx_bottom by preventing repeated failed SG fills#53aagit wants to merge 1 commit intowget:masterfrom
Conversation
When processing transmit packets in tx_bottom, the code was calling r8152_tx_agg_sg_fill for the same skb even when the SG fill failed to consume the skb. This could happen if the condition "1 + skb_shinfo(skb)->nr_frags > RTL_MAX_SG_NUM" was true, leading to infinite loops and softlockups. The softlockup here was reproducible only when accessing huggingface.co with Firefox. With this patch, the warning is logged as expected and no softlockup occurs anymore: [212844.954702] r8152 4-1:1.0 enp5s0f4u1: num_sgs 18 > max_sg_num 16 Increasing RTL_MAX_SG_NUM to avoid the failure appears to work too but it looks more fragile. Furthermore RTL_MAX_SG_NUM is used throughout the driver to communicate with the network layer and the driver is meant to be able to consume that number of network buffers at once, which may not be the case if increasing the value. This change prevents the infinite loop by invoking the non SG fill to consume the skb if the SG fill cannot consume it. Link: wget#48 Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
|
@aagit Have you tried the updated 2.21.4 driver from Realtek or compared the diff? It had a lot of changes. I'm running it currently on 3 hosts. So far no issues. 🤞 |
I'm reviewing it, see #54. static int r8152_tx_agg_sg_fill(struct r8152 *tp, struct tx_agg *agg)
{
struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
struct net_device *netdev = tp->netdev;
int max_sg_num, ret, sg_num;
struct scatterlist *sg;
int padding = 0;
__skb_queue_head_init(&skb_head);
spin_lock(&tx_queue->lock);
skb_queue_splice_init(tx_queue, &skb_head);
spin_unlock(&tx_queue->lock);
sg = agg->head;
- max_sg_num = (agg_buf_sz / sizeof(*sg)) - 1;
- max_sg_num = min_t(int, RTL_MAX_SG_NUM, max_sg_num);
+ max_sg_num = min_t(int, PAGE_SIZE, agg_buf_sz) / sizeof(*sg) - 1;
+ WARN_ON_ONCE(RTL_MAX_SG_NUM * 2 > max_sg_num);
sg_init_table(sg, max_sg_num + 1);
agg->skb_num = 0;
agg->skb_len = 0;
agg->skb_bytes = 0;
- for (sg_num = 0; sg_num < max_sg_num;) {
+ for (sg_num = 0; sg_num < RTL_MAX_SG_NUM;) {
struct sk_buff *skb;
- int num_sgs, headroom;
unsigned int len;
+ int num_sgs;
union {
struct tx_desc v1;
struct tx_desc_v2 v2;
} tx_desc;
skb = __skb_dequeue(&skb_head);
if (!skb)
break;
- headroom = skb_headroom(skb) - padding - tp->tx_desc.size;
-
- if (skb_header_cloned(skb) || headroom < 0) {
- struct sk_buff *tx_skb;
-
- headroom = padding + tp->tx_desc.size;
- tx_skb = skb_copy_expand(skb, headroom, 0, GFP_ATOMIC);
- if (!tx_skb) {
- __skb_queue_head(&skb_head, skb);
- break;
- }
- dev_kfree_skb_any(skb);
- skb = tx_skb;
- headroom = skb_headroom(skb) - headroom;
+ if (rtl_skb_check(tp, &skb, padding + tp->tx_desc.size) < 0) {
+ netif_err(tp, tx_err, netdev,
+ "rtl_skb_check fail. skb: %p, len: %d,"
+ "headroom: %d, nr_frags: %d\n",
+ skb, skb->len, skb_headroom(skb),
+ skb_shinfo(skb)->nr_frags);
+ netdev_completed_queue(netdev,
+ skb_shinfo(skb)->gso_segs ?: 1,
+ skb->len);
+ rtl_drop_tx_skb(tp, skb);
+ continue;
}
- /* calculate the fragment numbers for skb */
- num_sgs = 1 + skb_shinfo(skb)->nr_frags;
len = skb->len;
- if ((num_sgs + sg_num) > max_sg_num) {
- __skb_queue_head(&skb_head, skb);
- break;
- }
-
if (rtl_tx_csum(tp, &tx_desc, skb, len)) {
r8152_csum_workaround(tp, skb, &skb_head);
continue;
}
tp->tx_desc.vlan_tag(tp, &tx_desc, skb);
- WARN_ON(padding < 0);
-
/* use skb_headroom for tx desc */
- skb->cb[0] = padding + tp->tx_desc.size;
+// skb->cb[0] = padding + tp->tx_desc.size;
memcpy(skb_push(skb, tp->tx_desc.size), &tx_desc,
tp->tx_desc.size);
if (padding)
memset(skb_push(skb, padding), 0, padding);
num_sgs = skb_to_sgvec_nomark(skb, sg, 0, skb->len);
- if (num_sgs < 0) {
+ if (unlikely(num_sgs < 0)) {
netif_err(tp, tx_err, netdev,
- "skb_to_sgvec fail %d\n", num_sgs);
- __skb_queue_head(&skb_head, skb);
- break;
+ "skb_to_sgvec fail %d, sg_num=%d\n",
+ num_sgs, sg_num);
+ netdev_completed_queue(netdev,
+ skb_shinfo(skb)->gso_segs ?: 1,
+ len);
+ rtl_drop_tx_skb(tp, skb);
+ WARN_ON_ONCE(1);
+ continue;
}
+ WARN_ON_ONCE(num_sgs > RTL_MAX_SG_NUM);
+ netif_dbg(tp, tx_queued, netdev,
+ "sg_num=%d, skb len=%u, num_sgs=%d\n",
+ sg_num, len, num_sgs);
+
sg += num_sgs;
__skb_queue_tail(&agg->tx_skb, skb);
sg_num += num_sgs;
agg->skb_len += skb->len;
agg->skb_bytes += len;
agg->skb_num += skb_shinfo(skb)->gso_segs ?: 1;
padding = len + tp->tx_desc.size;
padding = ALIGN(padding, tp->tx_desc.align) - padding;
}
if (!skb_queue_empty(&skb_head)) {
+ WARN_ON_ONCE(!sg_num);
spin_lock(&tx_queue->lock);
skb_queue_splice(&skb_head, tx_queue);
spin_unlock(&tx_queue->lock);
}
- netif_tx_lock(netdev);
-
- if (netif_queue_stopped(netdev) &&
- skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
- netif_wake_queue(netdev);
-
- netif_tx_unlock(netdev);
-
- if (sg_num == 0) {
+ if (unlikely(sg_num == 0)) {
unsigned long flags;
spin_lock_irqsave(&tp->tx_lock, flags);
list_add_tail(&agg->list, &tp->tx_free);
spin_unlock_irqrestore(&tp->tx_lock, flags);
ret = 0;
goto out_tx_fill;
}
+ netif_tx_lock(netdev);
+
+ if (netif_queue_stopped(netdev) &&
+ skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
+ netif_wake_queue(netdev);
+
+ netif_tx_unlock(netdev);
+
ret = usb_autopm_get_interface_async(tp->intf);
if (ret < 0)
goto out_tx_fill;
sg_mark_end(sg);
usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_out,
NULL, (int)agg->skb_len,
(usb_complete_t)write_bulk_sg_callback, agg);
agg->urb->sg = agg->head;
agg->urb->num_sgs = sg_num;
ret = usb_submit_urb(agg->urb, GFP_ATOMIC);
if (ret < 0)
usb_autopm_put_interface_async(tp->intf);
+ else
+ tp->tx_submit++;
out_tx_fill:
if (ret < 0) {
netdev_completed_queue(netdev, agg->skb_num, agg->skb_bytes);
while (!skb_queue_empty(&agg->tx_skb))
- dev_kfree_skb_any(__skb_dequeue(&agg->tx_skb));
+ rtl_drop_tx_skb(tp, __skb_dequeue(&agg->tx_skb));
}
return ret;
}
static void tx_bottom(struct r8152 *tp)
{
- int res;
+ int res, i;
- do {
+ for (i = 0; i < RTL8152_MAX_TX; i++) {
struct net_device *netdev = tp->netdev;
struct tx_agg *agg;
- if (skb_queue_empty(&tp->tx_queue))
+ if (skb_queue_empty_lockless(&tp->tx_queue))
break;
agg = r8152_get_tx_agg(tp);
if (!agg)
break;
if (tp->sg_use)
res = r8152_tx_agg_sg_fill(tp, agg);
else
res = r8152_tx_agg_fill(tp, agg);
- if (!res)
- continue;
-
if (res == -ENODEV) {
rtl_set_unplug(tp);
netif_device_detach(netdev);
- } else {
+ break;
+ } else if (res < 0) {
struct net_device_stats *stats = &netdev->stats;
unsigned long flags;
netif_warn(tp, tx_err, netdev,
"failed tx_urb %d\n", res);
stats->tx_dropped += agg->skb_num;
spin_lock_irqsave(&tp->tx_lock, flags);
list_add_tail(&agg->list, &tp->tx_free);
spin_unlock_irqrestore(&tp->tx_lock, flags);
+ break;
}
- } while (res == 0);
+ }
} |
|
I had a quick look and the critical change is the 3 __skb_queue_head are all red, if the skb isn't added back to the head it can't loop I think. The check for num_sgs + sg_num > max_sg_num is gone as well. Instead of looping it should do WARN_ON_ONCE and a "skb_to_sgvec fail" error. From a quick review the softlockup seems fixed making my workaround obsolete. If other bugs have been introduced I didn't do a proper review to check that but it shouldn't hang anymore. Thanks for the heads up about the update. Ideally since they appear to be actively support the driver, they should go the last mile and submit it upstream. |
When processing transmit packets in tx_bottom, the code was calling r8152_tx_agg_sg_fill for the same skb even when the SG fill failed to consume the skb. This could happen if the condition "1 + skb_shinfo(skb)->nr_frags > RTL_MAX_SG_NUM" was true, leading to infinite loops and softlockups.
The softlockup here was reproducible only when accessing huggingface.co with Firefox. With this patch, the warning is logged as expected and no softlockup occurs anymore:
[212844.954702] r8152 4-1:1.0 enp5s0f4u1: num_sgs 18 > max_sg_num 16
Increasing RTL_MAX_SG_NUM to avoid the failure appears to work too but it looks more fragile. Furthermore RTL_MAX_SG_NUM is used throughout the driver to communicate with the network layer and the driver is meant to be able to consume that number of network buffers at once, which may not be the case if increasing the value.
This change prevents the infinite loop by invoking the non SG fill to consume the skb if the SG fill cannot consume it.
Link: #48