Re: epair and vnet jail loose connection.

From: Johan Hendriks <joh.hendriks_at_gmail.com>
Date: Sat, 12 Mar 2022 14:18:38 UTC
For me this minimal setup let me see the drop off of the network from the
haproxy server.

2 jails, one with haproxy, one with nginx which is using the following html
file to be served.

<!DOCTYPE html>
<html>
<head>
<title>Page Title</title>
</head>
<body>

<h1>My First Heading</h1>
<p>My first paragraph.</p>

</body>
</html>

From a remote machine i do a  hey -h2 -n 10 -c 10 -z 300s https://wp.test.nl
Then a ping on the jailhost to the haproxy shows the following

[ /] > ping 10.233.185.20
PING 10.233.185.20 (10.233.185.20): 56 data bytes
64 bytes from 10.233.185.20: icmp_seq=0 ttl=64 time=0.054 ms
64 bytes from 10.233.185.20: icmp_seq=1 ttl=64 time=0.050 ms
64 bytes from 10.233.185.20: icmp_seq=2 ttl=64 time=0.041 ms
<SNIP>
64 bytes from 10.233.185.20: icmp_seq=169 ttl=64 time=0.050 ms
64 bytes from 10.233.185.20: icmp_seq=170 ttl=64 time=0.154 ms
64 bytes from 10.233.185.20: icmp_seq=171 ttl=64 time=0.054 ms
64 bytes from 10.233.185.20: icmp_seq=172 ttl=64 time=0.039 ms
64 bytes from 10.233.185.20: icmp_seq=173 ttl=64 time=0.160 ms
64 bytes from 10.233.185.20: icmp_seq=174 ttl=64 time=0.045 ms
^C
--- 10.233.185.20 ping statistics ---
335 packets transmitted, 175 packets received, 47.8% packet loss
round-trip min/avg/max/stddev = 0.037/0.070/0.251/0.040 ms


ifconfig
vtnet0: flags=8963<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> metric 0
mtu 1500
options=4c00bb<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,JUMBO_MTU,VLAN_HWCSUM,VLAN_HWTSO,LINKSTATE,TXCSUM_IPV6>
ether 56:16:e9:80:5e:41
inet 87.233.191.146 netmask 0xfffffff0 broadcast 87.233.191.159
inet 87.233.191.156 netmask 0xffffffff broadcast 87.233.191.156
inet 87.233.191.155 netmask 0xffffffff broadcast 87.233.191.155
inet 87.233.191.154 netmask 0xffffffff broadcast 87.233.191.154
media: Ethernet autoselect (10Gbase-T <full-duplex>)
status: active
nd6 options=29<PERFORMNUD,IFDISABLED,AUTO_LINKLOCAL>
vtnet1: flags=8863<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu 1500
options=4c07bb<RXCSUM,TXCSUM,VLAN_MTU,VLAN_HWTAGGING,JUMBO_MTU,VLAN_HWCSUM,TSO4,TSO6,LRO,VLAN_HWTSO,LINKSTATE,TXCSUM_IPV6>
ether 56:16:2c:64:32:35
media: Ethernet autoselect (10Gbase-T <full-duplex>)
status: active
nd6 options=29<PERFORMNUD,IFDISABLED,AUTO_LINKLOCAL>
lo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> metric 0 mtu 16384
options=680003<RXCSUM,TXCSUM,LINKSTATE,RXCSUM_IPV6,TXCSUM_IPV6>
inet6 ::1 prefixlen 128
inet6 fe80::1%lo0 prefixlen 64 scopeid 0x3
inet 127.0.0.1 netmask 0xff000000
groups: lo
nd6 options=21<PERFORMNUD,AUTO_LINKLOCAL>
bridge0: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu
1500
ether 58:9c:fc:10:ff:82
inet 10.233.185.1 netmask 0xffffff00 broadcast 10.233.185.255
id 00:00:00:00:00:00 priority 32768 hellotime 2 fwddelay 15
maxage 20 holdcnt 6 proto rstp maxaddr 2000 timeout 1200
root id 00:00:00:00:00:00 priority 32768 ifcost 0 port 0
member: epair20a flags=143<LEARNING,DISCOVER,AUTOEDGE,AUTOPTP>
       ifmaxaddr 0 port 7 priority 128 path cost 2000
member: epair18a flags=143<LEARNING,DISCOVER,AUTOEDGE,AUTOPTP>
       ifmaxaddr 0 port 15 priority 128 path cost 2000
groups: bridge
nd6 options=9<PERFORMNUD,IFDISABLED>
bridge1: flags=8843<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu
1500
ether 58:9c:fc:10:d9:1a
id 00:00:00:00:00:00 priority 32768 hellotime 2 fwddelay 15
maxage 20 holdcnt 6 proto rstp maxaddr 2000 timeout 1200
root id 00:00:00:00:00:00 priority 32768 ifcost 0 port 0
member: vtnet0 flags=143<LEARNING,DISCOVER,AUTOEDGE,AUTOPTP>
       ifmaxaddr 0 port 1 priority 128 path cost 2000
groups: bridge
nd6 options=9<PERFORMNUD,IFDISABLED>
pflog0: flags=141<UP,RUNNING,PROMISC> metric 0 mtu 33160
groups: pflog
epair18a: flags=8963<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> metric
0 mtu 1500
description: jail_web01
options=8<VLAN_MTU>
ether 02:77:ea:19:c7:0a
groups: epair
media: Ethernet 10Gbase-T (10Gbase-T <full-duplex>)
status: active
nd6 options=29<PERFORMNUD,IFDISABLED,AUTO_LINKLOCAL>
epair20a: flags=8963<UP,BROADCAST,RUNNING,PROMISC,SIMPLEX,MULTICAST> metric
0 mtu 1500
description: jail_haproxy
options=8<VLAN_MTU>
ether 02:9b:93:8c:59:0a
groups: epair
media: Ethernet 10Gbase-T (10Gbase-T <full-duplex>)
status: active
nd6 options=29<PERFORMNUD,IFDISABLED,AUTO_LINKLOCAL>

jail.conf

# Global settings applied to all jails.
$domain = "test.nl";

exec.start = "/bin/sh /etc/rc";
exec.stop = "/bin/sh /etc/rc.shutdown";
exec.clean;

mount.fstab = "/storage/jails/$name.fstab";

exec.system_user  = "root";
exec.jail_user    = "root";
mount.devfs;
sysvshm="new";
sysvsem="new";
allow.raw_sockets;
allow.set_hostname = 0;
allow.sysvipc;
enforce_statfs = "2";
devfs_ruleset     = "11";

path = "/storage/jails/${name}";
host.hostname = "${name}.${domain}";


# Networking
vnet;
vnet.interface    = "vnet0";

  # Commands to run on host before jail is created
  exec.prestart  = "ifconfig epair${ip} create up description jail_${name}";
  exec.prestart  += "ifconfig epair${ip}a up";
  exec.prestart  += "ifconfig bridge0 addm epair${ip}a up";
  exec.created   = "ifconfig epair${ip}b name vnet0";

  # Commands to run in jail after it is created
  exec.start  += "/bin/sh /etc/rc";

  # commands to run in jail when jail is stopped
  exec.stop  = "/bin/sh /etc/rc.shutdown";

  # Commands to run on host when jail is stopped
  exec.poststop  = "ifconfig bridge0 deletem epair${ip}a";
  exec.poststop  += "ifconfig epair${ip}a destroy";
  persist;

web01 {
    $ip = 18;
}

haproxy {
    $ip = 20;
    mount.fstab = "";
    path = "/storage/jails/${name}";
}

pf.conf

#######################################################################
ext_if="vtnet0"
table <bruteforcers> persist
table <torlist> persist
table <ssh-trusted> persist file "/usr/local/etc/pf/ssh-trusted"
table <custom-block> persist file "/usr/local/etc/pf/custom-block"
table <jailnetworks> { 10.233.185.0/24, 192.168.10.0/24 }

icmp_types = "echoreq"
junk_ports="{ 135,137,138,139,445,68,67,3222,17500 }"

# Log interface
set loginterface $ext_if

# Set limits
set limit { states 40000, frags 20000, src-nodes 20000 }

scrub on $ext_if all fragment reassemble no-df random-id

# ---- Nat jails to the web
binat on $ext_if from 10.233.185.15/32 to !10.233.185.0/24 ->
87.233.191.156/32 # saltmaste
binat on $ext_if from 10.233.185.20/32 to !10.233.185.0/24 ->
87.233.191.155/32 # haproxy
binat on $ext_if from 10.233.185.22/32 to !10.233.185.0/24 ->
87.233.191.154/32 # web-comb

nat on $ext_if from <jailnetworks> to any -> ($ext_if:0)

# ---- First rule obligatory "Pass all on loopback"
pass quick on lo0 all
pass quick on bridge0 all
pass quick on bridge1 all

# ---- Block TOR exit addresses
block quick proto { tcp, udp } from <torlist> to $ext_if

# ---- Second rule "Block all in and pass all out"
block in log all
pass out all keep state

# IPv6 pass in/out all IPv6 ICMP traffic
pass in quick proto icmp6 all

# Pass all lo0
set skip on lo0

############### FIREWALL ###############################################
# ---- Block custom ip's and logs
block quick proto { tcp, udp } from <custom-block> to $ext_if

# ---- Jail poorten
pass in quick on { $ext_if } proto tcp from any to 10.233.185.22 port {
smtp 80 443 993 995 1956 } keep state
pass in quick on { $ext_if } proto tcp from any to 10.233.185.20 port {
smtp 80 443 993 995 1956 } keep state
pass in quick on { $ext_if } proto tcp from any to 10.233.185.15 port {
4505 4506 } keep state

# ---- Allow ICMP
pass in inet proto icmp all icmp-type $icmp_types keep state
pass out inet proto icmp all icmp-type $icmp_types keep state

pass in quick on $ext_if inet proto tcp from any to $ext_if port { 80, 443
} flags S/SA keep state
pass in quick on $ext_if inet proto tcp from <ssh-trusted> to $ext_if port
{ 4505 4506 } flags S/SA keep state
block log quick from <bruteforcers>
pass quick proto tcp from <ssh-trusted> to $ext_if port ssh flags S/SA keep
state

This is as minimal i can get it.

Hope this helps.
regards,
Johan Hendriks


Op za 12 mrt. 2022 om 02:10 schreef Kristof Provost <kp@freebsd.org>:

> On 11 Mar 2022, at 18:55, Michael Gmelin wrote:
> >> On 12. Mar 2022, at 01:21, Kristof Provost <kp@freebsd.org> wrote:
> >>
> >> On 11 Mar 2022, at 17:44, Johan Hendriks wrote:
> >>>> On 09/03/2022 20:55, Johan Hendriks wrote:
> >>>> The problem:
> >>>> I have a FreeBSD 14 machine and a FreeBSD 13-stable machine, both
> running the same jails just to test the workings.
> >>>>
> >>>> The jails that are running are a salt master, a haproxy  jail, 2
> webservers, 2 varnish servers, 2 php jails one for php8.0 and one with 8.1.
> All the jails are connected to bridge0 and all the jails use vnet.
> >>>>
> >>>> I believe this worked on an older 14-HEAD machine, but i did not do a
> lot with it back then, and when i started testing again and after updating
> the OS i noticed that one of the varnish jails lost it's network connection
> after running for a few hours. I thought it was just something on HEAD so
> never really looked at it. But later on when i start using the jails again
> and testing a test wordpress site i noticed that with a simple load test my
> haproxy jail within one minute looses it's network connection. I see
> nothing in the logs, on the host and on the jail.
> >>>> From the jail i can not ping the other jails or the IP adres of the
> bridge. I can however ping the jails own IP adres. From the host i can also
> not ping the haproxy jail IP adres. If i start a tcpdump on the epaira
> interface from the haproxy jail i do see the packets arrive but not in the
> jail.
> >>>>
> >>>> I used ZFS to send all the jails to a 13-STABLE machine and copied
> over the jail.conf file as well as the pf.conf file and i saw the same
> behavior.
> >>>>
> >>>> Then i tried to use 13.0-RELEASE-p7 and on that machine i do not see
> this happening. There i can stress test the machine for 10 minutes without
> a problem but on 14-HEAD and 13-STABLE within a minute the jail's network
> connection fails and only a restart of the jail brings it back online to
> exhibit the same behavior if i start a simple load test which it should
> handle nicely.
> >>>>
> >>>> One of the jail hosts is running under VMWARE and the other is
> running under Ubuntu with KVM. The 13.0-RELEASE-p7 jail host is running
> under Ubuntu with KVM
> >>>>
> >>>> Thank you for your time.
> >>>> regards
> >>>> Johan
> >>>>
> >>> I did some bisecting and the latest commit that works on FreeBSD
> 13-Stable is 009a56b2e
> >>> Then the commit 2e0bee4c7  if_epair: implement fanout and above is
> showing the symptoms described above.
> >>>
> >> Interestingly I cannot reproduce stalls in simple epair setups.
> >> It would be useful if you could reduce the setup with the problem into
> a minimal configuration so we can figure out what other factors are
> involved.
> >
> > If there are clear instructions on how to reproduce, I’m happy to help
> experimenting (I’m relying heavily on epair at this point).
> >
> > @Kristof: Did you try on bare metal or on vms?
> >
> Both.
>
> Kristof
>