mlx4en, timer irq @100%... (11.0 stuck on high network load ???)
Hans Petter Selasky
hps at selasky.org
Tue Aug 8 08:33:49 UTC 2017
On 08/08/17 10:06, Ben RUBSON wrote:
>> On 08 Aug 2017, at 10:02, Hans Petter Selasky <hps at selasky.org> wrote:
>>
>> On 08/08/17 10:00, Ben RUBSON wrote:
>>> kgdb) print *twq_2msl.tqh_first
>>> $2 = {
>>> tw_inpcb = 0xfffff8031c570740,
>>
>> print *twq_2msl.tqh_first->tw_inpcb
>
> (kgdb) print *twq_2msl.tqh_first->tw_inpcb
> $3 = {
> inp_hash = {
> le_next = 0x0,
> le_prev = 0xfffffe000f78adb8
> },
> inp_pcbgrouphash = {
> le_next = 0x0,
> le_prev = 0x0
> },
> inp_list = {
> le_next = 0xfffff80c2a07f570,
> le_prev = 0xffffffff81e15e20
> },
> inp_ppcb = 0xfffff80d1bf12210,
> inp_pcbinfo = 0xffffffff81e15e28,
> inp_pcbgroup = 0x0,
> inp_pcbgroup_wild = {
> le_next = 0x0,
> le_prev = 0x0
> },
> inp_socket = 0x0,
> inp_cred = 0xfffff804ae6ca400,
> inp_flow = 0,
> inp_flags = 92274688,
> inp_flags2 = 16,
> inp_vflag = 0 '\0',
> inp_ip_ttl = 64 '@',
> inp_ip_p = 0 '\0',
> inp_ip_minttl = 0 '\0',
> inp_flowid = 946611505,
> inp_refcount = 2,
> inp_pspare = 0xfffff8031c5707c0,
> inp_flowtype = 191,
> inp_rss_listen_bucket = 0,
> inp_ispare = 0xfffff8031c5707f0,
> inp_inc = {
> inc_flags = 0 '\0',
> inc_len = 0 '\0',
> inc_fibnum = 0,
> inc_ie = {
> ie_fport = 53987,
> ie_lport = 47873,
> ie_dependfaddr = {
> ie46_foreign = {
> ia46_pad32 = 0xfffff8031c570808,
> ia46_addr4 = {
> s_addr = 3011802202
> }
> },
> ie6_foreign = {
> __u6_addr = {
> __u6_addr8 = 0xfffff8031c570808 "",
> __u6_addr16 = 0xfffff8031c570808,
> __u6_addr32 = 0xfffff8031c570808
> }
> }
> },
> ie_dependladdr = {
> ie46_local = {
> ia46_pad32 = 0xfffff8031c570818,
> ia46_addr4 = {
> s_addr = 4068705883
> }
> },
> ie6_local = {
> __u6_addr = {
> __u6_addr8 = 0xfffff8031c570818 "",
> __u6_addr16 = 0xfffff8031c570818,
> __u6_addr32 = 0xfffff8031c570818
> }
> }
> },
> ie6_zoneid = 0
> }
> },
> inp_label = 0x0,
> inp_sp = 0x0,
> inp_depend4 = {
> inp4_ip_tos = 0 '\0',
> inp4_options = 0x0,
> inp4_moptions = 0x0
> },
> inp_depend6 = {
> inp6_options = 0x0,
> inp6_outputopts = 0x0,
> inp6_moptions = 0x0,
> inp6_icmp6filt = 0x0,
> inp6_cksum = 0,
> inp6_hops = 0
> },
> inp_portlist = {
> le_next = 0xfffff80274298ae0,
> le_prev = 0xfffff800454999b0
> },
> inp_phd = 0xfffff800454999a0,
> inp_gencnt = 2119756,
> inp_lle = 0x0,
> inp_lock = {
> lock_object = {
> lo_name = 0xffffffff814e6940 "tcpinp",
> lo_flags = 90898432,
> lo_data = 0,
> lo_witness = 0x0
> },
> rw_lock = 18446735277871559936
> },
> inp_rt_cookie = 10,
> inp_rtu = {
> inpu_route = {
> ro_rt = 0x0,
> ro_lle = 0x0,
> ro_prepend = 0x0,
> ro_plen = 0,
> ro_flags = 384,
> ro_mtu = 0,
> spare = 0,
> ro_dst = {
> sa_len = 16 '\020',
> sa_family = 2 '\002',
> sa_data = 0xfffff8031c5708f2 ""
> }
> },
> inpu_route6 = {
> ro_rt = 0x0,
> ro_lle = 0x0,
> ro_prepend = 0x0,
> ro_plen = 0,
> ro_flags = 384,
> ro_mtu = 0,
> spare = 0,
> ro_dst = {
> sin6_len = 16 '\020',
> sin6_family = 2 '\002',
> sin6_port = 0,
> sin6_flowinfo = 3011802202,
> sin6_addr = {
> __u6_addr = {
> __u6_addr8 = 0xfffff8031c5708f8 "",
> __u6_addr16 = 0xfffff8031c5708f8,
> __u6_addr32 = 0xfffff8031c5708f8
> }
> },
> sin6_scope_id = 0
> }
> }
> }
> }
> (kgdb)
>
Hi,
Here is the conclusion:
The following code is going in an infinite loop:
> for (;;) {
> TW_RLOCK(V_tw_lock);
> tw = TAILQ_FIRST(&V_twq_2msl);
> if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
> TW_RUNLOCK(V_tw_lock);
> break;
> }
> KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
> __func__));
>
> inp = tw->tw_inpcb;
> in_pcbref(inp);
> TW_RUNLOCK(V_tw_lock);
>
> if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
>
> INP_WLOCK(inp);
> tw = intotw(inp);
> if (in_pcbrele_wlocked(inp)) {
in_pcbrele_wlocked() returns (1) because INP_FREED (16) is set in
inp->inp_flags2. I guess you have invariants disabled, because the
KASSERT() below should have caused a panic.
> KASSERT(tw == NULL, ("%s: held last inp "
> "reference but tw not NULL", __func__));
> INP_INFO_RUNLOCK(&V_tcbinfo);
> continue;
> }
This is a regression issue after:
> commit 5630210a7f1dbbd903b77b2aef939cd47c63da58
> Author: jch <jch at FreeBSD.org>
> Date: Thu Oct 30 08:53:56 2014 +0000
>
> Fix a race condition in TCP timewait between tcp_tw_2msl_reuse() and
> tcp_tw_2msl_scan(). This race condition drives unplanned timewait
> timeout cancellation. Also simplify implementation by holding inpcb
> reference and removing tcptw reference counting.
Suggested fix attached.
--HPS
-------------- next part --------------
A non-text attachment was scrubbed...
Name: tcp_timewait.diff
Type: text/x-patch
Size: 561 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-net/attachments/20170808/57857ebf/attachment.bin>
More information about the freebsd-net
mailing list