rdr pass for proto tcp sometimes creates states with expire time zero and so breaking connections
Konstantin Belousov
kib at freebsd.org
Fri Jan 25 13:14:18 UTC 2019
On Thu, Jan 24, 2019 at 11:09:37PM +0100, Andreas Longwitz wrote:
> >> I think the problem is the cmpxchg8b instruction used in
> >> counter_u64_fetch(), because this machine instruction always writes to
> >> memory, also when we only want to read and have (EDX:EAX) = (ECX:EBX):
> >>
> >> TEMP64 <- DEST
> >> IF (EDX:EAX = TEMP64)
> >> THEN
> >> ZF <- 1
> >> DEST <- ECX:EBX
> >> ELSE
> >> ZF <- 0
> >> EDX:EAX <- TEMP64
> >> DEST <- TEMP64
> >> FI
> >>
> >> If one CPU increments the counter in pf_create_state() and another does
> >> the fetch, then both CPU's may run the xmpxschg8b at once with the
> >> chance that both read the same memory value in TEMP64 and the fetching
> >> CPU is the second CPU that writes and so the increment is lossed. Thats
> >> what I see without the above patch two or three times a week.
> >
> > Please try the following patch. The idea is to make the value to compare
> > with unlikely to be equal to the memory content, for fetch_one().
>
> During my research I first had the same idea, but it did not work. In
> the actual coding eax/edx is not well defined before cmpxchg8b is
> executed, but it does not help for the problem to do so.
>
> > Also it fixes a silly bug in zero_one().
> >
> > diff --git a/sys/i386/include/counter.h b/sys/i386/include/counter.h
> > index 7fd26d2a960..aa20831ba18 100644
> > --- a/sys/i386/include/counter.h
> > +++ b/sys/i386/include/counter.h
> > @@ -78,6 +78,9 @@ counter_u64_read_one_8b(uint64_t *p)
> > uint32_t res_lo, res_high;
> >
> > __asm __volatile(
> > + "movl (%0),%%eax\n\t"
> > + "movl 4(%0),%%edx\n\t"
> > + "addl $0x10000000,%%edx\n\t" /* avoid write */
> > "movl %%eax,%%ebx\n\t"
> > "movl %%edx,%%ecx\n\t"
> > "cmpxchg8b (%2)"
>
> We can not avoid the write done by cmpxchg8b as can be seen from the
> microcode given above, we always end up with "DEST <- TEMP". From the
> Intel instruction reference manual:
>
> The destination operand is written back if the comparision fails. (The
> processor never produces a locked read without also producing a locked
> write).
I see, AMD APM is more clear there, stating that the instruction always
do rmw regardless of lock prefix.
>
> Maybe it is enough to prefix the cmpxchg8b with LOCK only in function
> counter_u64_read_one_8b().
I am not sure. Lets switch to IPI method for fetch, similar to clear.
I do not think that the cost of fetch is too important comparing with
the race.
>
>
> > @@ -120,11 +123,11 @@ counter_u64_zero_one_8b(uint64_t *p)
> > {
> > __asm __volatile(
> > +"\n1:\n\t"
> > "movl (%0),%%eax\n\t"
> > - "movl 4(%0),%%edx\n"
> > + "movl 4(%0),%%edx\n\t"
> > "xorl %%ebx,%%ebx\n\t"
> > "xorl %%ecx,%%ecx\n\t"
> > -"1:\n\t"
> > "cmpxchg8b (%0)\n\t"
> > "jnz 1b"
> > :
>
> If jnz jumps back the instruction cmpxchg8b has load registers eax/edx
> with (%0), therefor I do not understand the silly bug.
Ignore me.
diff --git a/sys/i386/include/counter.h b/sys/i386/include/counter.h
index 7fd26d2a960..278f89123a4 100644
--- a/sys/i386/include/counter.h
+++ b/sys/i386/include/counter.h
@@ -72,7 +72,12 @@ counter_64_inc_8b(uint64_t *p, int64_t inc)
}
#ifdef IN_SUBR_COUNTER_C
-static inline uint64_t
+struct counter_u64_fetch_cx8_arg {
+ uint64_t res;
+ uint64_t *p;
+};
+
+static uint64_t
counter_u64_read_one_8b(uint64_t *p)
{
uint32_t res_lo, res_high;
@@ -87,9 +92,22 @@ counter_u64_read_one_8b(uint64_t *p)
return (res_lo + ((uint64_t)res_high << 32));
}
+static void
+counter_u64_fetch_cx8_one(void *arg1)
+{
+ struct counter_u64_fetch_cx8_arg *arg;
+ uint64_t val;
+
+ arg = arg1;
+ val = counter_u64_read_one_8b((uint64_t *)((char *)arg->p +
+ UMA_PCPU_ALLOC_SIZE * PCPU_GET(cpuid)));
+ atomic_add_64(&arg->res, val);
+}
+
static inline uint64_t
counter_u64_fetch_inline(uint64_t *p)
{
+ struct counter_u64_fetch_cx8_arg arg;
uint64_t res;
int i;
@@ -108,9 +126,10 @@ counter_u64_fetch_inline(uint64_t *p)
}
critical_exit();
} else {
- CPU_FOREACH(i)
- res += counter_u64_read_one_8b((uint64_t *)((char *)p +
- UMA_PCPU_ALLOC_SIZE * i));
+ arg.p = p;
+ arg.res = 0;
+ smp_rendezvous(NULL, counter_u64_fetch_cx8_one, NULL, &arg);
+ res = arg.res;
}
return (res);
}
More information about the freebsd-pf
mailing list