mlx4en, timer irq @100%... (11.0 stuck on high network load ???)

Ben RUBSON ben.rubson at gmail.com
Tue Aug 8 08:51:40 UTC 2017


> On 08 Aug 2017, at 10:31, Hans Petter Selasky <hps at selasky.org> wrote:
> 
> On 08/08/17 10:06, Ben RUBSON wrote:
>>> On 08 Aug 2017, at 10:02, Hans Petter Selasky <hps at selasky.org> wrote:
>>> 
>>> On 08/08/17 10:00, Ben RUBSON wrote:
>>>> kgdb) print *twq_2msl.tqh_first
>>>> $2 = {
>>>>   tw_inpcb = 0xfffff8031c570740,
>>> 
>>> print *twq_2msl.tqh_first->tw_inpcb
>> (kgdb) print *twq_2msl.tqh_first->tw_inpcb
>> $3 = {
>>   inp_hash = {
>>     le_next = 0x0,
>>     le_prev = 0xfffffe000f78adb8
>>   },
>>   inp_pcbgrouphash = {
>>     le_next = 0x0,
>>     le_prev = 0x0
>>   },
>>   inp_list = {
>>     le_next = 0xfffff80c2a07f570,
>>     le_prev = 0xffffffff81e15e20
>>   },
>>   inp_ppcb = 0xfffff80d1bf12210,
>>   inp_pcbinfo = 0xffffffff81e15e28,
>>   inp_pcbgroup = 0x0,
>>   inp_pcbgroup_wild = {
>>     le_next = 0x0,
>>     le_prev = 0x0
>>   },
>>   inp_socket = 0x0,
>>   inp_cred = 0xfffff804ae6ca400,
>>   inp_flow = 0,
>>   inp_flags = 92274688,
>>   inp_flags2 = 16,
>>   inp_vflag = 0 '\0',
>>   inp_ip_ttl = 64 '@',
>>   inp_ip_p = 0 '\0',
>>   inp_ip_minttl = 0 '\0',
>>   inp_flowid = 946611505,
>>   inp_refcount = 2,
>>   inp_pspare = 0xfffff8031c5707c0,
>>   inp_flowtype = 191,
>>   inp_rss_listen_bucket = 0,
>>   inp_ispare = 0xfffff8031c5707f0,
>>   inp_inc = {
>>     inc_flags = 0 '\0',
>>     inc_len = 0 '\0',
>>     inc_fibnum = 0,
>>     inc_ie = {
>>       ie_fport = 53987,
>>       ie_lport = 47873,
>>       ie_dependfaddr = {
>>         ie46_foreign = {
>>           ia46_pad32 = 0xfffff8031c570808,
>>           ia46_addr4 = {
>>             s_addr = 3011802202
>>           }
>>         },
>>         ie6_foreign = {
>>           __u6_addr = {
>>             __u6_addr8 = 0xfffff8031c570808 "",
>>             __u6_addr16 = 0xfffff8031c570808,
>>             __u6_addr32 = 0xfffff8031c570808
>>           }
>>         }
>>       },
>>       ie_dependladdr = {
>>         ie46_local = {
>>           ia46_pad32 = 0xfffff8031c570818,
>>           ia46_addr4 = {
>>             s_addr = 4068705883
>>           }
>>         },
>>         ie6_local = {
>>           __u6_addr = {
>>             __u6_addr8 = 0xfffff8031c570818 "",
>>             __u6_addr16 = 0xfffff8031c570818,
>>             __u6_addr32 = 0xfffff8031c570818
>>           }
>>         }
>>       },
>>       ie6_zoneid = 0
>>     }
>>   },
>>   inp_label = 0x0,
>>   inp_sp = 0x0,
>>   inp_depend4 = {
>>     inp4_ip_tos = 0 '\0',
>>     inp4_options = 0x0,
>>     inp4_moptions = 0x0
>>   },
>>   inp_depend6 = {
>>     inp6_options = 0x0,
>>     inp6_outputopts = 0x0,
>>     inp6_moptions = 0x0,
>>     inp6_icmp6filt = 0x0,
>>     inp6_cksum = 0,
>>     inp6_hops = 0
>>   },
>>   inp_portlist = {
>>     le_next = 0xfffff80274298ae0,
>>     le_prev = 0xfffff800454999b0
>>   },
>>   inp_phd = 0xfffff800454999a0,
>>   inp_gencnt = 2119756,
>>   inp_lle = 0x0,
>>   inp_lock = {
>>     lock_object = {
>>       lo_name = 0xffffffff814e6940 "tcpinp",
>>       lo_flags = 90898432,
>>       lo_data = 0,
>>       lo_witness = 0x0
>>     },
>>     rw_lock = 18446735277871559936
>>   },
>>   inp_rt_cookie = 10,
>>   inp_rtu = {
>>     inpu_route = {
>>       ro_rt = 0x0,
>>       ro_lle = 0x0,
>>       ro_prepend = 0x0,
>>       ro_plen = 0,
>>       ro_flags = 384,
>>       ro_mtu = 0,
>>       spare = 0,
>>       ro_dst = {
>>         sa_len = 16 '\020',
>>         sa_family = 2 '\002',
>>         sa_data = 0xfffff8031c5708f2 ""
>>       }
>>     },
>>     inpu_route6 = {
>>       ro_rt = 0x0,
>>       ro_lle = 0x0,
>>       ro_prepend = 0x0,
>>       ro_plen = 0,
>>       ro_flags = 384,
>>       ro_mtu = 0,
>>       spare = 0,
>>       ro_dst = {
>>         sin6_len = 16 '\020',
>>         sin6_family = 2 '\002',
>>         sin6_port = 0,
>>         sin6_flowinfo = 3011802202,
>>         sin6_addr = {
>>           __u6_addr = {
>>             __u6_addr8 = 0xfffff8031c5708f8 "",
>>             __u6_addr16 = 0xfffff8031c5708f8,
>>             __u6_addr32 = 0xfffff8031c5708f8
>>           }
>>         },
>>         sin6_scope_id = 0
>>       }
>>     }
>>   }
>> }
>> (kgdb)
> 
> Hi,
> 
> Here is the conclusion:
> 
> The following code is going in an infinite loop:
> 
> 
>>        for (;;) {
>>                TW_RLOCK(V_tw_lock);
>>                tw = TAILQ_FIRST(&V_twq_2msl);
>>                if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
>>                        TW_RUNLOCK(V_tw_lock);
>>                        break;
>>                }
>>                KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
>>                    __func__));
>>                inp = tw->tw_inpcb;
>>                in_pcbref(inp);
>>                TW_RUNLOCK(V_tw_lock);
>>                if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
>>                        INP_WLOCK(inp);
>>                        tw = intotw(inp);
>>                        if (in_pcbrele_wlocked(inp)) {
> 
> in_pcbrele_wlocked() returns (1) because INP_FREED (16) is set in inp->inp_flags2. I guess you have invariants disabled, because the KASSERT() below should have caused a panic.

You're right, INVARIANTS is not in GENERIC :

# grep -i INVARIANTS /usr/src/sys/amd64/conf/GENERIC
# grep -i INVARIANTS /usr/src/sys/amd64/conf/*
/usr/src/sys/amd64/conf/MINIMAL:options 	INVARIANTS		# Enable calls of extra sanity checking
/usr/src/sys/amd64/conf/MINIMAL:options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS

>>                                KASSERT(tw == NULL, ("%s: held last inp "
>>                                    "reference but tw not NULL", __func__));
>>                                INP_INFO_RUNLOCK(&V_tcbinfo);
>>                                continue;
>>                        }
> 
> This is a regression issue after:
> 
>> commit 5630210a7f1dbbd903b77b2aef939cd47c63da58
>> Author: jch <jch at FreeBSD.org>
>> Date:   Thu Oct 30 08:53:56 2014 +0000
>>    Fix a race condition in TCP timewait between tcp_tw_2msl_reuse() and
>>    tcp_tw_2msl_scan().  This race condition drives unplanned timewait
>>    timeout cancellation.  Also simplify implementation by holding inpcb
>>    reference and removing tcptw reference counting.
> 
> Suggested fix attached.

Thank you very much for your investigation HPS and for the patch !
Not sure however how to test it as I don't know a way to reproduce the issue.

Certainly your patch will reach 11.0 & 11.1 soon.

Thank you again, and glad to have helped to track this down.

Ben



More information about the freebsd-stable mailing list