IPsec performace - netisr hits %100

Özkan KIRIK ozkan.kirik at gmail.com
Sat May 1 13:31:13 UTC 2021


This bug is related to CCR. @Navdeep Parhar <np at freebsd.org> , @John Baldwin
<jhb at freebsd.org> if you are interested to fix this bug related with CCR, I
can test if you provide patches. Test environment is explained in my first
email on this thread.

@Mark Johnston <markj at freebsd.org> Now again on stable/13,
- with aesni, without netipsec/ipsec_input.c patch - 1.44Gbps - single
netisr thread eats %100 cpu
- with qat, without netipsec/ipsec_input.c patch - 1.88Gbps - single netisr
thread eats %100 cpu

- with aesni, with netipsec/ipsec_input.c patch - 1.33Gbps
  PID   JID USERNAME    PRI NICE   SIZE    RES STATE    C   TIME    WCPU
COMMAND
    7     0 root        -16    -     0B    16K CPU4     4   0:39  98.62%
[crypto returns 3]
    6     0 root        -16    -     0B    16K CPU5     5   0:32  84.14%
[crypto returns 2]
   11     0 root        -92    -     0B  1152K CPU8     8   0:28  72.17%
[intr{irq97: t6nex0:0a2}]
   11     0 root        -92    -     0B  1152K CPU3     3   0:20  51.22%
[intr{irq107: t6nex0:1a2}]
    8     0 root        -16    -     0B    16K crypto   7   0:12  30.68%
[crypto returns 4]
    0     0 root        -20    -     0B    13M RUN      7   0:07  21.52%
[kernel{crypto_2}]
    0     0 root        -20    -     0B    13M CPU12   12   0:06  20.53%
[kernel{crypto_15}]
    0     0 root        -20    -     0B    13M CPU9     9   0:05  19.97%
[kernel{crypto_0}]
    0     0 root        -20    -     0B    13M -        0   0:05  19.86%
[kernel{crypto_14}]
    5     0 root        -16    -     0B    16K CPU2     2   0:08  19.75%
[crypto returns 1]
    0     0 root        -20    -     0B    13M -        6   0:05  19.56%
[kernel{crypto_9}]
    0     0 root        -20    -     0B    13M -        7   0:07  18.74%
[kernel{crypto_3}]
    0     0 root        -20    -     0B    13M -       13   0:07  18.68%
[kernel{crypto_5}]
    0     0 root        -20    -     0B    13M CPU1     1   0:07  18.12%
[kernel{crypto_11}]
    0     0 root        -20    -     0B    13M -        2   0:07  17.46%
[kernel{crypto_8}]
    0     0 root        -20    -     0B    13M CPU10   10   0:05  17.31%
[kernel{crypto_1}]
    0     0 root        -20    -     0B    13M -        4   0:06  17.08%
[kernel{crypto_7}]
    0     0 root        -20    -     0B    13M -       14   0:07  16.07%
[kernel{crypto_12}]
    0     0 root        -20    -     0B    13M -       11   0:07  15.39%
[kernel{crypto_6}]
    0     0 root        -20    -     0B    13M -        6   0:07  12.09%
[kernel{crypto_4}]

- with qat, with netipsec/ipsec_input.c patch - 2.85Gbps -
  PID   JID USERNAME    PRI NICE   SIZE    RES STATE    C   TIME    WCPU
COMMAND
   20     0 root        -16    -     0B    16K CPU6     6   0:18  89.69%
[crypto returns 13]
   19     0 root        -16    -     0B    16K CPU4     4   0:17  80.25%
[crypto returns 12]
   11     0 root        -92    -     0B  1696K CPU1     1   0:52  43.90%
[intr{irq97: t6nex0:0a2}]
   21     0 root        -16    -     0B    16K crypto   0   0:09  43.43%
[crypto returns 14]
   18     0 root        -16    -     0B    16K crypto   2   0:05  24.94%
[crypto returns 11]
   11     0 root        -92    -     0B  1696K WAIT    12   0:35  20.18%
[intr{irq107: t6nex0:1a2}]
   11     0 root        -92    -     0B  1696K WAIT     8   0:02   7.44%
[intr{irq168: qat1}]
 4202     1 root         23    0    32M  4480K sbwait  14   0:01   5.57%
iperf -B 172.16.70.5 -c 172.16.68.1 -P 2 -t 30{iperf}
 4217     1 root         22    0    32M  4480K sbwait   5   0:01   5.52%
iperf -B 172.16.70.10 -c 172.16.68.1 -P 2 -t 30{iperf}
...
   11     0 root        -92    -     0B  1696K WAIT    12   0:01   4.39%
[intr{irq155: qat0}]
 4182     0 root         21    0   105M    40M sbwait   4   0:01   4.04%
iperf -s{iperf}
 4182     0 root         21    0   105M    40M sbwait   5   0:01   3.99%
iperf -s{iperf}
...
   11     0 root        -92    -     0B  1696K WAIT    12   0:00   1.84%
[intr{irq172: qat1}]
   11     0 root        -92    -     0B  1696K WAIT    15   0:00   1.07%
[intr{irq175: qat1}]
   11     0 root        -92    -     0B  1696K WAIT     4   0:00   1.04%
[intr{irq164: qat1}]
   11     0 root        -92    -     0B  1696K CPU6     6   0:00   1.02%
[intr{irq166: qat1}]
   11     0 root        -92    -     0B  1696K WAIT    14   0:00   1.00%
[intr{irq174: qat1}]
   11     0 root        -92    -     0B  1696K WAIT    10   0:00   0.98%
[intr{irq170: qat1}]
   11     0 root        -92    -     0B  1696K WAIT     5   0:00   0.98%
[intr{irq165: qat1}]
   11     0 root        -92    -     0B  1696K WAIT     3   0:00   0.97%
[intr{irq163: qat1}]
   11     0 root        -92    -     0B  1696K WAIT     7   0:00   0.95%
[intr{irq167: qat1}]
   11     0 root        -92    -     0B  1696K WAIT     2   0:00   0.93%
[intr{irq162: qat1}]

stable/13 results are better then stable/12 but not enough fast. There is
something makes bottleneck for IPsec.

On Sat, May 1, 2021 at 3:39 PM Özkan KIRIK <ozkan.kirik at gmail.com> wrote:

> I've pulled the latest stable/13 branch and make buildworld.
>
> Same configuration and same usage, ping works between two sides, iperf can
> connect to server but no data transferred. throughput is around 10Kbps.
> I tried both with and without netipsec/ipsec_input.c patch, but no change.
> There is something wrong with ipsec (or ipsec in jail) in stable/13.
> iperf between host and jail works without ipsec.
>
> I saw some errors in truss output show as a brief:
> write(3,"@\^A\0x\0\0\0\^A\0\0\^S\M^I\0\0"...,131072) ERR#35 'Resource
> temporarily unavailable'
> recvfrom(3,0x8018048c0,131072,0,0x0,0x0) ERR#35 'Resource temporarily
> unavailable'
> _umtx_op(0x80026e008,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x18,0x7fffdfffdec8)
> ERR#60 'Operation timed out'
>
>
> Full output of truss output is below:
>
> root at host # jexec client bash
> [root at client /]# truss iperf -B 172.16.70.1 -c 172.16.68.1 2>&1 | grep -v
> clock_nanosleep
> mmap(0x0,135168,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) =
> 34362150912 (0x80024d000)
> mprotect(0x80024a000,4096,PROT_READ) = 0 (0x0)
> issetugid() = 0 (0x0)
> sigfastblock(0x1,0x80024c510) = 0 (0x0)
> open("/etc/libmap.conf",O_RDONLY|O_CLOEXEC,011136710) = 3 (0x3)
> fstat(3,{ mode=-rw-r--r-- ,inode=482,size=47,blksize=4096 }) = 0 (0x0)
> read(3,"# $FreeBSD$\nincludedir /usr/loc"...,47) = 47 (0x2f)
> close(3) = 0 (0x0)
> open("/usr/local/etc/libmap.d",O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC,0165)
> ERR#2 'No such file or directory'
> open("/var/run/ld-elf.so.hints",O_RDONLY|O_CLOEXEC,010015464) = 3 (0x3)
> read(3,"Ehnt\^A\0\0\0\M^@\0\0\0\M-U\0\0"...,128) = 128 (0x80)
> fstat(3,{ mode=-r--r--r-- ,inode=187,size=341,blksize=4096 }) = 0 (0x0)
> pread(3,"/lib:/usr/lib:/usr/local/lib:/us"...,213,0x80) = 213 (0xd5)
> close(3) = 0 (0x0)
> open("/lib/libthr.so.3",O_RDONLY|O_CLOEXEC|O_VERIFY,00) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186701,size=125952,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,184320,PROT_NONE,MAP_GUARD,-1,0x0) = 34362290176 (0x80026f000)
> mmap(0x80026f000,53248,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34362290176 (0x80026f000)
> mmap(0x80027c000,73728,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0xc000)
> = 34362343424 (0x80027c000)
> mmap(0x80028e000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1d000)
> = 34362417152 (0x80028e000)
> mmap(0x80028f000,8192,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1d000)
> = 34362421248 (0x80028f000)
> mmap(0x800291000,45056,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANON,-1,0x0)
> = 34362429440 (0x800291000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/librt.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,066000) ERR#2 'No such
> file or directory'
> open("/usr/lib/librt.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,066000) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=411606,size=22824,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,36864,PROT_NONE,MAP_GUARD,-1,0x0) = 34362474496 (0x80029c000)
> mmap(0x80029c000,12288,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34362474496 (0x80029c000)
> mmap(0x80029f000,12288,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x2000)
> = 34362486784 (0x80029f000)
> mmap(0x8002a2000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x4000)
> = 34362499072 (0x8002a2000)
> mmap(0x8002a3000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x4000)
> = 34362503168 (0x8002a3000)
> mmap(0x8002a4000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANON,-1,0x0)
> = 34362507264 (0x8002a4000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/libc++.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,00) ERR#2 'No such
> file or directory'
> open("/usr/lib/libc++.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,00) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=411469,size=824056,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,860160,PROT_NONE,MAP_GUARD,-1,0x0) = 34362511360 (0x8002a5000)
> mmap(0x8002a5000,376832,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34362511360 (0x8002a5000)
> mmap(0x800301000,425984,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x5b000)
> = 34362888192 (0x800301000)
> mmap(0x800369000,24576,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0xc2000)
> = 34363314176 (0x800369000)
> mmap(0x80036f000,8192,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0xc7000)
> = 34363338752 (0x80036f000)
> mmap(0x800371000,24576,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANON,-1,0x0)
> = 34363346944 (0x800371000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/libcxxrt.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,041400) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186668,size=113688,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,143360,PROT_NONE,MAP_GUARD,-1,0x0) = 34363371520 (0x800377000)
> mmap(0x800377000,53248,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34363371520 (0x800377000)
> mmap(0x800384000,61440,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0xc000)
> = 34363424768 (0x800384000)
> mmap(0x800393000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1a000)
> = 34363486208 (0x800393000)
> mmap(0x800394000,8192,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1a000)
> = 34363490304 (0x800394000)
> mmap(0x800396000,16384,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANON,-1,0x0)
> = 34363498496 (0x800396000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/libm.so.5",O_RDONLY|O_CLOEXEC|O_VERIFY,00) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186671,size=214192,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,225280,PROT_NONE,MAP_GUARD,-1,0x0) = 34363514880 (0x80039a000)
> mmap(0x80039a000,73728,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34363514880 (0x80039a000)
> mmap(0x8003ac000,143360,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x11000)
> = 34363588608 (0x8003ac000)
> mmap(0x8003cf000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x33000)
> = 34363731968 (0x8003cf000)
> mmap(0x8003d0000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x33000)
> = 34363736064 (0x8003d0000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/libgcc_s.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,020643400) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186687,size=93448,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,106496,PROT_NONE,MAP_GUARD,-1,0x0) = 34363740160 (0x8003d1000)
> mmap(0x8003d1000,36864,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34363740160 (0x8003d1000)
> mmap(0x8003da000,57344,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x8000)
> = 34363777024 (0x8003da000)
> mmap(0x8003e8000,8192,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x15000)
> = 34363834368 (0x8003e8000)
> mmap(0x8003ea000,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x16000)
> = 34363842560 (0x8003ea000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> open("/lib/libc.so.7",O_RDONLY|O_CLOEXEC|O_VERIFY,00) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186667,size=1904424,blksize=32768 }) = 0
> (0x0)
> mmap(0x0,4096,PROT_READ,MAP_PRIVATE|MAP_PREFAULT_READ,3,0x0) = 34362286080
> (0x80026e000)
> mmap(0x0,4182016,PROT_NONE,MAP_GUARD,-1,0x0) = 34363846656 (0x8003eb000)
> mmap(0x8003eb000,520192,PROT_READ,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x0)
> = 34363846656 (0x8003eb000)
> mmap(0x80046a000,1331200,PROT_READ|PROT_EXEC,MAP_PRIVATE|MAP_FIXED|MAP_NOCORE|MAP_PREFAULT_READ,3,0x7e000)
> = 34364366848 (0x80046a000)
> mmap(0x8005af000,36864,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1c2000)
> = 34365698048 (0x8005af000)
> mmap(0x8005b8000,28672,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_PREFAULT_READ,3,0x1ca000)
> = 34365734912 (0x8005b8000)
> mmap(0x8005bf000,2265088,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_FIXED|MAP_ANON,-1,0x0)
> = 34365763584 (0x8005bf000)
> munmap(0x80026e000,4096) = 0 (0x0)
> close(3) = 0 (0x0)
> mprotect(0x80028e000,4096,PROT_READ) = 0 (0x0)
> mprotect(0x8002a2000,4096,PROT_READ) = 0 (0x0)
> mprotect(0x800369000,20480,PROT_READ) = 0 (0x0)
> mprotect(0x800393000,4096,PROT_READ) = 0 (0x0)
> mprotect(0x8003cf000,4096,PROT_READ) = 0 (0x0)
> mprotect(0x8003e8000,4096,PROT_READ) = 0 (0x0)
> mprotect(0x8005af000,36864,PROT_READ) = 0 (0x0)
> mprotect(0x8005af000,36864,PROT_READ|PROT_WRITE) = 0 (0x0)
> mprotect(0x8005af000,36864,PROT_READ) = 0 (0x0)
> readlink("/etc/malloc.conf",0x7fffffffd5c0,1024) ERR#2 'No such file or
> directory'
> issetugid() = 0 (0x0)
> __sysctl("vm.overcommit",2,0x7fffffffbb54,0x7fffffffbb48,0x0,0) = 0 (0x0)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(21),-1,0x0)
> = 34368126976 (0x800800000)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(12),-1,0x0)
> = 34370224128 (0x800a00000)
> mmap(0x0,4194304,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(21),-1,0x0)
> = 34372321280 (0x800c00000)
> __sysctl("kern.usrstack",2,0x8002903c8,0x7fffffffd878,0x0,0) = 0 (0x0)
> getrlimit(RLIMIT_STACK,{ cur=536870912,max=536870912 }) = 0 (0x0)
> thr_self(0x800a12000) = 0 (0x0)
> mmap(0x7fffdfffe000,4096,PROT_NONE,MAP_ANON,-1,0x0) = 140736951476224
> (0x7fffdfffe000)
> rtprio_thread(RTP_LOOKUP,102215,0x7fffffffd838) = 0 (0x0)
> sigaction(SIGTHR,{ 0x8002884d0 SA_SIGINFO ss_t },0x0) = 0 (0x0)
> sigprocmask(SIG_UNBLOCK,{ },0x0) = 0 (0x0)
> _umtx_op(0x7fffffffd830,UMTX_OP_WAKE,0x1,0x0,0x0) = 0 (0x0)
> mprotect(0x0,0,PROT_NONE) = 0 (0x0)
> getpid() = 7030 (0x1b76)
> getpid() = 7030 (0x1b76)
> sigprocmask(SIG_BLOCK,{
> SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2
> },{ }) = 0 (0x0)
> sigfastblock(0x3,0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{ },0x0) = 0 (0x0)
> sigfastblock(0x1,0x800a12038) = 0 (0x0)
> getcontext(0x7fffffffcbd0) = 0 (0x0)
> sysarch(AMD64_GET_XFPUSTATE,0x7fffffffcb98) = 0 (0x0)
> mmap(0x0,135168,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) =
> 34376515584 (0x801000000)
> mprotect(0x22c000,4096,PROT_READ) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{
> SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2
> },{ }) = 0 (0x0)
> sigaction(SIGTERM,{ 0x800288380 SA_RESTART|SA_SIGINFO ss_t },{ SIG_DFL 0x0
> ss_t }) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{ },0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{
> SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2
> },{ }) = 0 (0x0)
> sigaction(SIGINT,{ 0x800288380 SA_RESTART|SA_SIGINFO ss_t },{ SIG_DFL 0x0
> ss_t }) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{ },0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{
> SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2
> },{ }) = 0 (0x0)
> sigaction(SIGALRM,{ 0x800288380 SA_SIGINFO ss_t },{ SIG_DFL 0x0 ss_t }) =
> 0 (0x0)
> sigprocmask(SIG_SETMASK,{ },0x0) = 0 (0x0)
> sigprocmask(SIG_SETMASK,{
> SIGHUP|SIGINT|SIGQUIT|SIGILL|SIGTRAP|SIGABRT|SIGEMT|SIGFPE|SIGKILL|SIGBUS|SIGSEGV|SIGSYS|SIGPIPE|SIGALRM|SIGTERM|SIGURG|SIGSTOP|SIGTSTP|SIGCONT|SIGCHLD|SIGTTIN|SIGTTOU|SIGIO|SIGXCPU|SIGXFSZ|SIGVTALRM|SIGPROF|SIGWINCH|SIGINFO|SIGUSR1|SIGUSR2
> },{ }) = 0 (0x0)
> sigaction(SIGPIPE,{ SIG_IGN SA_RESTART ss_t },{ SIG_DFL 0x0 ss_t }) = 0
> (0x0)
> sigprocmask(SIG_SETMASK,{ },0x0) = 0 (0x0)
> mmap(0x0,4096,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) =
> 34362286080 (0x80026e000)
> mmap(0x0,147456,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON,-1,0x0) =
> 34376650752 (0x801021000)
> mmap(0x7fffdfdfd000,2101248,PROT_READ|PROT_WRITE,MAP_STACK,-1,0x0) =
> 140736949374976 (0x7fffdfdfd000)
> mprotect(0x7fffdfdfd000,4096,PROT_NONE) = 0 (0x0)
> thr_new(0x7fffffffe880,0x68) = 0 (0x0)
> <new thread 102901>
> sigfastblock(0x1,0x800a12738) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> mmap(0x7fffdfbfc000,2101248,PROT_READ|PROT_WRITE,MAP_STACK,-1,0x0) =
> 140736947273728 (0x7fffdfbfc000)
> mprotect(0x7fffdfbfc000,4096,PROT_NONE) = 0 (0x0)
> thr_new(0x7fffffffe880,0x68) = 0 (0x0)
> <new thread 102902>
> sigfastblock(0x1,0x800a12e38) = 0 (0x0)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(21),-1,0x0)
> = 34378612736 (0x801200000)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(21),-1,0x0)
> = 34380709888 (0x801400000)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(12),-1,0x0)
> = 34382807040 (0x801600000)
> mmap(0x0,2097152,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(12),-1,0x0)
> = 34384904192 (0x801800000)
> socket(PF_INET,SOCK_STREAM,0) = 3 (0x3)
> bind(3,{ AF_INET 172.16.70.1:0 },16) = 0 (0x0)
> connect(3,{ AF_INET 172.16.68.1:5001 },16) = 0 (0x0)
> setsockopt(3,SOL_SOCKET,SO_SNDTIMEO,0x7fffdfdfced0,16) = 0 (0x0)
> getsockname(3,{ AF_INET 172.16.70.1:34901 },0x800a1b200) = 0 (0x0)
> getpeername(3,{ AF_INET 172.16.68.1:5001 },0x800a1b178) = 0 (0x0)
> getsockname(3,{ AF_INET 172.16.70.1:34901 },0x7fffdfdfce3c) = 0 (0x0)
> __sysctl("net.routetable.0.0.5.0",6,0x0,0x7fffdfdfcdc8,0x0,0) = 0 (0x0)
> __sysctl("net.routetable.0.0.5.0",6,0x801826000,0x7fffdfdfcdc8,0x0,0) = 0
> (0x0)
> _umtx_op(0x80026e008,UMTX_OP_WAKE_PRIVATE,0x7fffffff,0x0,0x0) = 0 (0x0)
> _umtx_op(0x80026e008,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x18,0x7fffdfffdec8) =
> 0 (0x0)
> getpid() = 7030 (0x1b76)
> getsockopt(3,IPPROTO_TCP,TCP_MAXSEG,0x7fffdfdfceb4,0x7fffdfdfceb0) = 0
> (0x0)
> getsockopt(3,SOL_SOCKET,SO_SNDBUF,0x7fffdfdfceb4,0x7fffdfdfceb0) = 0 (0x0)
> fstat(1,{ mode=p--------- ,inode=12766,size=0,blksize=4096 }) = 0 (0x0)
> mmap(0x0,2621440,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANON|MAP_ALIGNED(12),-1,0x0)
> = 34387001344 (0x801a00000)
> sendto(3,"@\^A\0x\0\0\0\^A\0\0\^S\M^I\0\0"...,60,MSG_DONTWAIT,NULL,0) = 60
> (0x3c)
> getsockopt(3,SOL_SOCKET,SO_SNDBUF,0x7fffdfffde64,0x7fffdfffde60) = 0 (0x0)
> setsockopt(3,SOL_SOCKET,SO_SNDTIMEO,0x7fffdfdfced0,16) = 0 (0x0)
> ------------------------------------------------------------
> Client connecting to 172.16.68.1, TCP port 5001
> TCP window size: 33.2 KByte (default)
> ------------------------------------------------------------
> write(1,"--------------------------------"...,208) = 208 (0xd0)
> [  1] local 172.16.70.1 port 34901 connected with 172.16.68.1 port 5001
> write(1,"[  1] local 172.16.70.1 port 349"...,72) = 72 (0x48)
> write(3,"@\^A\0x\0\0\0\^A\0\0\^S\M^I\0\0"...,131072) = 34000 (0x84d0)
> write(3,"@\^A\0x\0\0\0\^A\0\0\^S\M^I\0\0"...,131072) ERR#35 'Resource
> temporarily unavailable'
> setitimer(0,{ 0.000000, 0.000000 },0x0) = 0 (0x0)
> shutdown(3,SHUT_WR) = 0 (0x0)
> setsockopt(3,SOL_SOCKET,SO_RCVTIMEO,0x7fffdfdfce60,16) = 0 (0x0)
> recvfrom(3,"\0\0\0\^B\0\0\0\^\\0\0\0\0\0\^B"...,131072,0,NULL,0x0) = 28
> (0x1c)
> recvfrom(3,0x8018048c0,131072,0,0x0,0x0) ERR#35 'Resource temporarily
> unavailable'
> _umtx_op(0x80026e020,UMTX_OP_WAKE_PRIVATE,0x7fffffff,0x0,0x0) = 0 (0x0)
> _umtx_op(0x80026e020,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x18,0x7fffdfdfcd38) =
> 0 (0x0)
> [ ID] Interval       Transfer     Bandwidth
> [  1] 0.00-20.05 sec  33.3 KBytes  13.6 Kbits/sec
> write(1,"[ ID] Interval       Transfer   "...,94) = 94 (0x5e)
> _umtx_op(0x80026e020,UMTX_OP_WAKE_PRIVATE,0x7fffffff,0x0,0x0) = 0 (0x0)
> _umtx_op(0x80026e020,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x18,0x7fffdfdfcd38) =
> 0 (0x0)
> close(3) = 0 (0x0)
> _umtx_op(0x800a130c0,UMTX_OP_NWAKE_PRIVATE,0x1,0x0,0x0) = 0 (0x0)
> _umtx_op(0x8002908a8,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x0,0x0) = 0 (0x0)
> __sysctl("kern.ostype",2,0x80024be6a,0x7fffdfdfb720,0x0,0) = 0 (0x0)
> __sysctl("kern.hostname",2,0x80024bf6a,0x7fffdfdfb720,0x0,0) = 0 (0x0)
> __sysctl("kern.osrelease",2,0x80024c06a,0x7fffdfdfb720,0x0,0) = 0 (0x0)
> __sysctl("kern.version",2,0x80024c16a,0x7fffdfdfb720,0x0,0) = 0 (0x0)
> __sysctl("hw.machine",2,0x80024c26a,0x7fffdfdfb720,0x0,0) = 0 (0x0)
> open("/lib/libgcc_s.so.1",O_RDONLY|O_CLOEXEC|O_VERIFY,00) = 3 (0x3)
> fstat(3,{ mode=-r--r--r-- ,inode=186687,size=93448,blksize=32768 }) = 0
> (0x0)
> close(3) = 0 (0x0)
> madvise(0x801a00000,1839104,MADV_FREE) = 0 (0x0)
> madvise(0x80184f000,4096,MADV_FREE) = 0 (0x0)
> madvise(0x801851000,36864,MADV_FREE) = 0 (0x0)
> madvise(0x801801000,274432,MADV_FREE) = 0 (0x0)
> <thread 102902 exited>
> _umtx_op(0x80026e008,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x18,0x7fffdfffdec8)
> ERR#60 'Operation timed out'
> _umtx_op(0x800a129c0,UMTX_OP_NWAKE_PRIVATE,0x1,0x0,0x0) = 0 (0x0)
> _umtx_op(0x8002908a8,UMTX_OP_WAIT_UINT_PRIVATE,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_UNLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_WRLOCK,0x0,0x0,0x0) = 0 (0x0)
> _umtx_op(0x800290d80,UMTX_OP_RW_RDLOCK,0x0,0x0,0x0) = 0 (0x0)
> <thread 102901 exited>
> exit(0x0)
> process exit, rval = 0
>
> On Sat, May 1, 2021 at 1:26 AM Özkan KIRIK <ozkan.kirik at gmail.com> wrote:
>
>> Here is fresh results;
>> a bit performance gain achieved. But strangely QAT is better now.
>>
>> - with CCR - 2.14 Gbps
>>   PID USERNAME    PRI NICE   SIZE    RES STATE    C   TIME    WCPU COMMAND
>>    15 root        -16    -     0B    16K CPU7     7   0:11 100.00%
>> [crypto returns 9]
>>    11 root        -92    -     0B  1120K CPU4     4   0:10  98.74%
>> [intr{irq295: t6nex0:0a0}]
>>    14 root        -16    -     0B    16K CPU11   11   0:09  95.32%
>> [crypto returns 8]
>>    11 root        -92    -     0B  1120K CPU8     8   0:07  63.49%
>> [intr{irq297: t6nex0:0a2}]
>>    11 root        -92    -     0B  1120K WAIT    12   0:05  42.73%
>> [intr{irq307: t6nex0:1a2}]
>>    16 root        -16    -     0B    16K CPU15   15   0:04  33.82%
>> [crypto returns 10]
>>    13 root        -16    -     0B    16K RUN      3   0:02  21.78%
>> [crypto returns 7]
>>  4317 root         22    0    23M  4348K sbwait  14   0:01   7.24% iperf
>> -B 172.16.70.6 -c 172.16.68.1 -P 2 -t 20{iperf}
>>  4329 root         23    0    23M  4348K sbwait  14   0:01   7.18% iperf
>> -B 172.16.70.10 -c 172.16.68.1 -P 2 -t 20{iperf}
>> ...
>>    11 root        -92    -     0B  1120K WAIT     3   0:00   0.31%
>> [intr{irq294: t6nex0:evt}]
>>     0 root        -92    -     0B  2144K -       14   0:00   0.30%
>> [kernel{t6nex0 tq0}]
>> ...
>>
>>
>> - with QAT - 2.63 Gbps
>>  PID USERNAME    PRI NICE   SIZE    RES STATE    C   TIME    WCPU COMMAND
>>     8 root        -16    -     0B    16K CPU7     7   0:31  92.05%
>> [crypto returns 5]
>>     7 root        -16    -     0B    16K CPU0     0   0:29  86.03%
>> [crypto returns 4]
>>     9 root        -16    -     0B    16K RUN     15   0:15  43.32%
>> [crypto returns 6]
>>    11 root        -92    -     0B  1664K RUN      4   0:15  43.03%
>> [intr{irq297: t6nex0:0a2}]
>>     6 root        -16    -     0B    16K crypto   4   0:09  26.01%
>> [crypto returns 3]
>>    11 root        -92    -     0B  1664K WAIT    12   0:07  19.88%
>> [intr{irq307: t6nex0:1a2}]
>>    11 root        -92    -     0B  1664K WAIT     8   0:02   7.22%
>> [intr{irq368: qat1}]
>>  4313 root         22    0    23M  4348K sbwait   2   0:01   6.51% iperf
>> -B 172.16.70.8 -c 172.16.68.1 -P 2 -t 20{iperf}
>>  4316 root         22    0    23M  4348K sbwait   3   0:01   6.18% iperf
>> -B 172.16.70.9 -c 172.16.68.1 -P 2 -t 20{iperf}
>> ...
>>    11 root        -92    -     0B  1664K WAIT    12   0:02   4.52%
>> [intr{irq355: qat0}]
>> ...
>>    11 root        -92    -     0B  1664K WAIT    12   0:01   1.89%
>> [intr{irq372: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     0   0:00   1.12%
>> [intr{irq360: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     2   0:00   1.11%
>> [intr{irq362: qat1}]
>>    11 root        -92    -     0B  1664K WAIT    14   0:00   1.10%
>> [intr{irq374: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     4   0:00   1.10%
>> [intr{irq364: qat1}]
>>    11 root        -92    -     0B  1664K WAIT    10   0:00   1.10%
>> [intr{irq370: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     7   0:00   1.09%
>> [intr{irq367: qat1}]
>>    11 root        -92    -     0B  1664K WAIT    11   0:00   1.07%
>> [intr{irq371: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     5   0:00   1.04%
>> [intr{irq365: qat1}]
>>    11 root        -92    -     0B  1664K WAIT    15   0:00   1.04%
>> [intr{irq375: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     1   0:00   1.04%
>> [intr{irq361: qat1}]
>>    11 root        -92    -     0B  1664K WAIT     6   0:00   1.03%
>> [intr{irq366: qat1}]
>>
>> On Sat, May 1, 2021 at 12:45 AM Mark Johnston <markj at freebsd.org> wrote:
>>
>>> On Sat, May 01, 2021 at 12:31:57AM +0300, Özkan KIRIK wrote:
>>> > Hello again,
>>> >
>>> > patch is applied, now netisr is not eating CPU. but performance drops
>>> > around 0.2Gbps according to previous kernel.
>>> >
>>> > I tried also both net.isr.maxthreads=1 and net.isr.maxthreads=4 .
>>> results
>>> > are same
>>> >
>>> > Results are:
>>> >
>>> > - with CCR - 1.8Gbps
>>> > top:
>>> >   PID USERNAME    PRI NICE   SIZE    RES STATE    C   TIME    WCPU
>>> COMMAND
>>> >    14 root        -16    -     0B    16K CPU5     5   1:38 100.00%
>>> [crypto
>>> > returns 8]
>>> >     3 root        -16    -     0B    16K CPU1     1   0:58  77.83%
>>> [crypto
>>> > returns 0]
>>>
>>> Could you also try this patch?  It won't help with aesni.
>>>
>>> diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c
>>> index dfd22662e87f..bc23056e86ad 100644
>>> --- a/sys/opencrypto/crypto.c
>>> +++ b/sys/opencrypto/crypto.c
>>> @@ -94,6 +94,7 @@ struct crypto_session {
>>>         void *softc;
>>>         uint32_t hid;
>>>         uint32_t capabilities;
>>> +       uint64_t id;
>>>  };
>>>
>>>  SDT_PROVIDER_DEFINE(opencrypto);
>>> @@ -572,6 +573,7 @@ crypto_select_driver(const struct cryptoini *cri,
>>> int flags)
>>>  int
>>>  crypto_newsession(crypto_session_t *cses, struct cryptoini *cri, int
>>> crid)
>>>  {
>>> +       static uint64_t sessid = 0;
>>>         crypto_session_t res;
>>>         void *softc_mem;
>>>         struct cryptocap *cap;
>>> @@ -616,6 +618,7 @@ crypto_newsession(crypto_session_t *cses, struct
>>> cryptoini *cri, int crid)
>>>         softc_mem = malloc(softc_size, M_CRYPTO_DATA, M_WAITOK | M_ZERO);
>>>         res = uma_zalloc(cryptoses_zone, M_WAITOK | M_ZERO);
>>>         res->softc = softc_mem;
>>> +       res->id = atomic_fetchadd_64(&sessid, 1);
>>>
>>>         CRYPTO_DRIVER_LOCK();
>>>         cap = crypto_checkdriver(hid);
>>> @@ -1016,7 +1019,7 @@ crypto_dispatch(struct cryptop *crp)
>>>                 binuptime(&crp->crp_tstamp);
>>>  #endif
>>>
>>> -       crp->crp_retw_id = ((uintptr_t)crp->crp_session) %
>>> crypto_workers_num;
>>> +       crp->crp_retw_id = crp->crp_session->id % crypto_workers_num;
>>>
>>>         if (CRYPTOP_ASYNC(crp)) {
>>>                 if (crp->crp_flags & CRYPTO_F_ASYNC_KEEPORDER) {
>>>
>>


More information about the freebsd-net mailing list