FreeBSD 7-STABLE mbuf corruption

Arnaud Lacombe lacombar at gmail.com
Thu Sep 8 19:04:36 UTC 2011


Hi,

On Wed, Sep 7, 2011 at 8:18 PM, Sean Bruno <seanbru at yahoo-inc.com> wrote:
> [...]
> In addition to some txeof() handling patches ... Jack can see if this is
> still relevant in the freebsd7 universe. Note that Yahoo is "special"
> and we pluck and chuck code/drivers/whatever at will so I don't know how
> this code will apply to stable-7 or if it has been applied already.
>
> Note that this code "rage-quits" the EIAM/EIAC auto ack strategy and
> tries to handle the OACTIVE state handling better.  I hope these clues
> find you well.
>
Just for the record, I'll comment inline.

> --- //depot/yahoo/ybsd_7/src/sys/dev/e1000/if_igb.c     2010-11-29
> 20:47:23.000000000 0000
> +++ /home/seanbru/ybsd_7/src/sys/dev/e1000/if_igb.c     2010-11-29
> 20:47:23.000000000 0000
> @@ -30,7 +30,7 @@
>   POSSIBILITY OF SUCH DAMAGE.
>
>
> ******************************************************************************/
> -/*$FreeBSD: src/sys/dev/e1000/if_igb.c,v 1.3.2.13 2010/11/27 01:09:54
> jfv Exp $*/
> +/*$FreeBSD: stable/7/sys/dev/e1000/if_igb.c 216465 2010-12-15 22:48:44Z
> jfv $*/
>
>
>  #ifdef HAVE_KERNEL_OPTION_HEADERS
> @@ -318,10 +318,6 @@
>  static bool igb_header_split = FALSE;
>  TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
>
> -/*
> -** This will autoconfigure based on
> -** the number of CPUs if left at 0.
> -*/
>  static int igb_num_queues = 0;
>  TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
>
> @@ -784,10 +780,14 @@
>                return;
>
>        /* Call cleanup if number of TX descriptors low */
> +#if 0
>        if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
>                igb_txeof(txr);
> +#endif
>
>        while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
> +               if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
> +                       igb_txeof(txr);
>                if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
>                        ifp->if_drv_flags |= IFF_DRV_OACTIVE;
>                        break;

> @@ -1162,10 +1162,10 @@
>                IGB_TX_LOCK(txr);
>                if (igb_txeof(txr))
>                        more = TRUE;
> -               if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
> -                       igb_start_locked(txr, ifp);
> +               /*if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) Pointless as
> igb_start_locked() checks this right off the bat*/
> +               igb_start_locked(txr, ifp);
>                IGB_TX_UNLOCK(txr);
> -               if (more) {
> +               if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
>                        taskqueue_enqueue(que->tq, &que->que_task);
>                        return;
>                }
that hunk seem already to be in the HEAD driver.

> @@ -1298,15 +1298,20 @@
>        struct rx_ring *rxr = que->rxr;
>        u32             newitr = 0;
>        bool            more_tx, more_rx;
> +       struct ifnet    *ifp = adapter->ifp;
>
> +#if 0
>        E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
> +       newitr = E1000_READ_REG(&adapter->hw, E1000_EICR);
> +#endif
> +       E1000_WRITE_REG(&adapter->hw, E1000_EICR, que->eims);
>        ++que->irqs;
>
>        IGB_TX_LOCK(txr);
>        more_tx = igb_txeof(txr);
>        IGB_TX_UNLOCK(txr);
>
already in HEAD.

> -       more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
> +       more_rx = igb_rxeof(que, -1, NULL);
>
>        if (igb_enable_aim == FALSE)
>                goto no_calc;
applied manually.

> @@ -1361,7 +1366,7 @@
>
>  no_calc:
>        /* Schedule a clean task if needed*/
> -       if (more_tx || more_rx)
> +       if (more_tx || more_rx || (ifp->if_drv_flags & IFF_DRV_OACTIVE))
>                taskqueue_enqueue(que->tq, &que->que_task);
>        else
>                /* Reenable this interrupt */
already in HEAD.

> @@ -1382,6 +1387,7 @@
>        struct adapter  *adapter = arg;
>        u32             icr;
>
> +       E1000_WRITE_REG(&adapter->hw, E1000_EICR, adapter->link_mask);
>        ++adapter->link_irq;
>        icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
>        if (!(icr & E1000_ICR_LSC))
applied manually.

> @@ -1535,6 +1541,14 @@
>        if (m_head->m_flags & M_VLANTAG)
>                cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
>
> +/*
> + * We just did this in before invocation, seems completely
> + * redundant, igb_handle_queue -> igb_txeof
> + * Pretty sure this is impossible as we check for the
> + * IGB_TX_CLEANUP_THRESHOLD in igb_start_locked() which happens
> + * before this func in invoked
> + */
> +#if 0
>         /*
>          * Force a cleanup if number of TX descriptors
>          * available hits the threshold
> @@ -1547,6 +1561,7 @@
>                        return (ENOBUFS);
>                }
>        }
> +#endif
>
>        /*
>          * Map the packet for DMA.
already in HEAD.

> @@ -2082,6 +2097,7 @@
>                        que->eims = E1000_EICR_TX_QUEUE0 << i;
>                else
>                        que->eims = 1 << vector;
> +               device_printf(adapter->dev, "que %d eims= 0x%x\n", i,
> que->eims);
>                /*
>                ** Bind the msix vector, and thus the
>                ** rings to the corresponding cpu.
applied manually

> @@ -3264,8 +3280,6 @@
>                case ETHERTYPE_IPV6:
>                        ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
>                        ip_hlen = sizeof(struct ip6_hdr);
> -                       if (mp->m_len < ehdrlen + ip_hlen)
> -                               return (FALSE);
>                        ipproto = ip6->ip6_nxt;
>                        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
>                        break;
already in HEAD.

> @@ -4461,6 +4475,24 @@
>  static void
>  igb_enable_intr(struct adapter *adapter)
>  {
> +       uint32_t ims, regval;
> +
> +        if (adapter->msix_mem) {
> +                ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
> +                regval = E1000_READ_REG(&adapter->hw, E1000_EIAC);
> +#if 0
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval |
> adapter->eims_mask);
> +                regval = E1000_READ_REG(&adapter->hw, E1000_EIAM);
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval |
> adapter->eims_mask);
> +#endif
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
> adapter->eims_mask);
> +                E1000_WRITE_REG(&adapter->hw, E1000_IMS, ims);
> +        } else {
> +                E1000_WRITE_REG(&adapter->hw, E1000_IMS,
> IMS_ENABLE_MASK | E1000_IMS_DRSTA);
> +                E1000_WRITE_REG(&adapter->hw, E1000_IAM,
> IMS_ENABLE_MASK | E1000_IMS_DRSTA);
> +        }
> +
> +#if 0
>        /* With RSS set up what to auto clear */
>        if (adapter->msix_mem) {
>                E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
> @@ -4475,6 +4507,7 @@
>                E1000_WRITE_REG(&adapter->hw, E1000_IMS,
>                    IMS_ENABLE_MASK);
>        }
> +#endif
>        E1000_WRITE_FLUSH(&adapter->hw);
>
>        return;
> @@ -4483,11 +4516,33 @@
>  static void
>  igb_disable_intr(struct adapter *adapter)
>  {
> +       uint32_t regval;
> +
> +        /*
> +         * we need to be careful when disabling interrupts.  The VFs
> are also
> +         * mapped into these registers and so clearing the bits can
> cause
> +         * issues on the VF drivers so we only need to clear what we
> set
> +         */
> +        if (adapter->msix_mem) {
> +                regval = E1000_READ_REG(&adapter->hw, E1000_EIAM);
> +#if 0
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIAM, regval &
> ~adapter->eims_mask);
> +                regval = E1000_READ_REG(&adapter->hw, E1000_EIAC);
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIAC, regval &
> ~adapter->eims_mask);
> +#endif
> +                E1000_WRITE_REG(&adapter->hw, E1000_EIMC,
> adapter->eims_mask);
> +        }
> +
> +        E1000_WRITE_REG(&adapter->hw, E1000_IAM, 0);
> +        E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
> +
> +#if 0
>        if (adapter->msix_mem) {
>                E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
>                E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
>        }
>        E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
> +#endif
>        E1000_WRITE_FLUSH(&adapter->hw);
>        return;
>  }
applied manually.

> @@ -4876,8 +4931,8 @@
>        struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
>        struct e1000_hw_stats *stats = adapter->stats;
>
> -       struct sysctl_oid *stat_node, *queue_node, *int_node,
> *host_node;
> -       struct sysctl_oid_list *stat_list, *queue_list, *int_list,
> *host_list;
> +       struct sysctl_oid *stat_node, *queue_node, *int_node,
> *host_node, *debug_node;
> +       struct sysctl_oid_list *stat_list, *queue_list, *int_list,
> *host_list, *debug_list;
>
>  #define QUEUE_NAME_LEN 32
>        char namebuf[QUEUE_NAME_LEN];
> @@ -5245,6 +5300,27 @@
>        SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
>                        CTLFLAG_RD, &stats->hrmpc,
>                        "Header Redirection Missed Packet Count");
> +
> +
> +       debug_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "debug",
> +                                   CTLFLAG_RD, NULL,
> +                                   "Debug info");
> +
> +       debug_list = SYSCTL_CHILDREN(debug_node);
> +
> +       txr = adapter->tx_rings;
> +       SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "desc_avail",
> +                       CTLFLAG_RD, (u_int *)(uintptr_t)&txr->tx_avail,
> 0,
> +                       "");
> +
> +       SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "next_to_clean",
> +                       CTLFLAG_RD, &txr->next_to_clean, 0,
> +                       "");
> +
> +       SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "if_flags",
> +                       CTLFLAG_RD, &adapter->ifp->if_drv_flags, 0,
> +                       "");
> +
>  }
>
applied manually.

LYK,
 - Arnaud


More information about the freebsd-net mailing list