Four problems with PF/CARP (NAT/CARP/PFSYNC/VLAN)

Janusz Mućka (Defacto) gdef at cvd.pl
Wed Sep 28 04:11:39 PDT 2005


Hi,

There are four problems with pf and/or CARP. This is short network description:

WAN <--> CISCO ROUTER <--> PIX FIREWALL <---> FreeBSD 5.4 <---> LAN
                                          |                 |
                                          --> FreeBSD 5.4 <--

Network cards in FreeBSD box are:
em0: <Intel(R) PRO/1000 Network Connection, Version - 1.7.35>

Custom sysctl varibles:
kern.maxfiles=8144
kern.ipc.somaxconn=256
security.bsd.see_other_uids=0
net.link.ether.inet.proxyall=0
net.link.ether.inet.log_arp_wrong_iface=0
net.inet.ip.random_id=1
net.inet.ip.stealth=1
net.inet.tcp.sendspace=65536
net.inet.tcp.drop_synfin=1
net.inet.tcp.blackhole=2
net.inet.udp.blackhole=1
net.inet.carp.preempt=1
kern.maxfiles=16424
kern.maxfilesperproc=16424

System on both boxes (cvsuped today):
5.4-STABLE FreeBSD 5.4-STABLE #12: Wed Sep 28 08:50:40 CEST 2005

And these are the problems:
1) CARP problem. When packet with source IP from CARP logical interface is sent from interface it has ARP source address of physical interface.
It can cause connection reset on firewalls protecting against ARP poisoning. Here is sample from tcpdump:

09:17:12.115469 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.58296 > 192.168.20.100.22: P 2552:2600(48) ack 5822 win 32832 <nop,nop,timestamp 2486153214 273992649>
09:17:12.197103 00:e0:b6:05:6e:4a > 00:00:5e:00:01:64, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.58296: P 5822:5870(48) ack 2600 win 32832 <nop,nop,timestamp 273993145 2486153214>
09:17:12.250509 00:e0:b6:05:6e:4a > 00:00:5e:00:01:64, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.58296: P 5870:5950(80) ack 2600 win 32832 <nop,nop,timestamp 273993148 2486153214>
09:17:12.254403 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.58296 > 192.168.20.100.22: . ack 5950 win 32792 <nop,nop,timestamp 2486153359 273993145>

2) PF problem. This is propably NAT issue. After random number of sent packets connection is reseted. Reset is made by PIX because there is strange (very hight) sequential and ACK number in packet
(there is 2934356076:2934356124(48) ack 1778440099, but should be 2840:????(48) ack 6558 - where ???? is next... in this case unknown number). Maybe in normal environment packet is sillently dropped and later retransmitted.

10:58:25.461110 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2552:2600(48) ack 5790 win 32832 <nop,nop,timestamp 2492286734 274600487>
10:58:25.493246 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 5790:5838(48) ack 2600 win 32832 <nop,nop,timestamp 274600539 2492286734>
10:58:25.527593 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 5838:5918(80) ack 2600 win 32832 <nop,nop,timestamp 274600542 2492286734>
10:58:25.538031 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 5918 win 32792 <nop,nop,timestamp 2492286812 274600539>

10:58:28.481294 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2600:2648(48) ack 5918 win 32832 <nop,nop,timestamp 2492289784 274600539>
10:58:28.527429 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 5918:5966(48) ack 2648 win 32832 <nop,nop,timestamp 274600842 2492289784>
10:58:28.551036 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 5966:6046(80) ack 2648 win 32832 <nop,nop,timestamp 274600845 2492289784>
10:58:28.551358 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 6046 win 32792 <nop,nop,timestamp 2492289855 274600842>

10:58:30.643914 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2648:2696(48) ack 6046 win 32832 <nop,nop,timestamp 2492291968 274600842>
10:58:30.678680 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6046:6094(48) ack 2696 win 32832 <nop,nop,timestamp 274601057 2492291968>
10:58:30.707290 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6094:6174(80) ack 2696 win 32832 <nop,nop,timestamp 274601060 2492291968>
10:58:30.707617 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 6174 win 32792 <nop,nop,timestamp 2492292033 274601057>

10:58:31.050973 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2696:2744(48) ack 6174 win 32832 <nop,nop,timestamp 2492292379 274601057>
10:58:31.092163 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6174:6222(48) ack 2744 win 32832 <nop,nop,timestamp 274601097 2492292379>
10:58:31.106039 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6222:6302(80) ack 2744 win 32832 <nop,nop,timestamp 274601100 2492292379>
10:58:31.124640 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 6302 win 32792 <nop,nop,timestamp 2492292454 274601097>

10:58:33.406048 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2744:2792(48) ack 6302 win 32832 <nop,nop,timestamp 2492294758 274601097>
10:58:33.442038 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6302:6350(48) ack 2792 win 32832 <nop,nop,timestamp 274601333 2492294758>
10:58:33.476625 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6350:6430(80) ack 2792 win 32832 <nop,nop,timestamp 274601337 2492294758>
10:58:33.490131 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 6430 win 32792 <nop,nop,timestamp 2492294843 274601333>

10:58:35.854608 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.50352 > 192.168.20.100.22: P 2792:2840(48) ack 6430 win 32832 <nop,nop,timestamp 2492297231 274601333>
10:58:35.896098 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6430:6478(48) ack 2840 win 32832 <nop,nop,timestamp 274601578 2492297231>
10:58:35.911508 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 146: IP 192.168.20.100.22 > 192.168.10.33.50352: P 6478:6558(80) ack 2840 win 32832 <nop,nop,timestamp 274601581 2492297231>
10:58:35.911917 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 66: IP 192.168.10.33.50352 > 192.168.20.100.22: . ack 6558 win 32792 <nop,nop,timestamp 2492297289 274601578>

10:58:39.005606 00:03:47:32:ec:0a > 00:e0:b6:05:6e:4a, ethertype IPv4 (0x0800), length 114: IP 192.168.10.33.51210 > 192.168.20.100.22: P 2934356076:2934356124(48) ack 1778440099 win 32832 <nop,nop,timestamp 2492300413 274601578>
10:58:39.005867 00:e0:b6:05:6e:4a > 00:03:47:32:ec:0a, ethertype IPv4 (0x0800), length 114: IP 192.168.20.100.22 > 192.168.10.33.51210: R 1:49(48) ack 0 win 32832 <nop,nop,timestamp 2492300413 274601578>

3) PFSYNC problem. When states are synchronizing in CARP cluster on BACKUP state machine after 1000-3000 states flush is made. On machine acting as MASTER nothing like this is happen. States are normally created and removed (on connection end, timeout, etc).
This cause following problems:
 a) return to from BACKUP to MASTER state is very long (it can be even impossible beacause before return must be made full states sync and flushes disturbs sync process)
 b) when machine acting as MASTER fails not all connection are keep on BACKUP. Effect is easy to guess...

4) CARP & VLAN. When VLAN interface parrent device is in down state and CARP is created on VLAN, after link state up CARP remains in INIT state. This is fixed by following patches but still not committed.

--- if_em.c.orig        Thu May 19 10:23:06 2005
+++ if_em.c     Tue Aug 16 14:03:15 2005
@@ -1666,6 +1666,11 @@
        return;
 }

+
+#ifdef DEV_CARP
+extern  void (*vlan_link_state_p)(struct ifnet *, int);
+#endif
+
 static void
 em_print_link_status(struct adapter * adapter)
 {
@@ -1685,6 +1690,8 @@
                        adapter->smartspeed = 0;
                        ifp->if_link_state = LINK_STATE_UP;
 #ifdef DEV_CARP
+                       if (ifp->if_nvlans != 0)
+                               (*vlan_link_state_p)(ifp, NOTE_LINKUP);
                        if (ifp->if_carp)
                                carp_carpdev_state(ifp->if_carp);
 #endif
@@ -1697,6 +1704,8 @@
                        adapter->link_active = 0;
                        ifp->if_link_state = LINK_STATE_DOWN;
 #ifdef DEV_CARP
+                       if (ifp->if_nvlans != 0)
+                               (*vlan_link_state_p)(ifp, NOTE_LINKDOWN);
                        if (ifp->if_carp)
                                carp_carpdev_state(ifp->if_carp);
 #endif




--- if_vlan.c.orig      Tue Aug 16 13:41:18 2005
+++ if_vlan.c   Tue Aug 16 13:47:29 2005
@@ -41,6 +41,7 @@
  * and ask it to send them.
  */

+#include "opt_carp.h"
 #include "opt_inet.h"

 #include <sys/param.h>
@@ -67,6 +68,11 @@
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
+
+#ifdef DEV_CARP
+#include <netinet/in_var.h>
+#include <netinet/ip_carp.h>
+#endif
 #endif

 #define VLANNAME       "vlan"
@@ -822,6 +828,10 @@
                        ifv->ifv_if.if_link_state = ifv->ifv_p->if_link_state;
                        rt_ifmsg(&(ifv->ifv_if));
                        KNOTE_UNLOCKED(&ifp->if_klist, link);
+#ifdef DEV_CARP
+                       if (ifv->ifv_if.if_carp)
+                               carp_carpdev_state(ifv->ifv_if.if_carp);
+#endif
                }
        }
        VLAN_UNLOCK();


Thanks for any answer and possible solutions or patches.
I'll be glad to help or test it.

--
Janusz Mućka
admin at cvd.pl
UIN 82936675


More information about the freebsd-pf mailing list