problems with new the "contigmalloc" routine

Sun May 22 15:45:58 GMT 2005

On Saturday 21 May 2005 20:04, Scott Long wrote:
> John-Mark Gurney wrote:
> > Hans Petter Selasky wrote this message on Sat, May 21, 2005 at 15:46 
+0200:
> >>On Saturday 21 May 2005 00:49, Peter Jeremy wrote:
> >>>On Fri, 2005-May-20 21:51:34 +0200, Hans Petter Selasky wrote:
> >>>>Non-blocking mode has got to be supported. Else you get a nightmare
> >>>>rewriting the code to support blocking mode.
> >>>
> >>>Your code either has to block somewhere or fail.  contigmalloc() only
> >>>blocks if it couldn't satisfy the request immediately.  If it returns
> >>>to your code, you still have the problem of needing the memory and
> >>>not being able to allocate it without blocking.
> >>
> >>That is not the problem. The problem is that it sleeps, which will exit
> >> the Giant lock, which means another thread can change the state of what
> >> I am about to setup meanwhile:
> >>
> >>one_thread:
> >>
> >> mtx_lock(&Giant);
> >>
> >> if(sc->gone == 0)
> >> {
> >>    sc->data = contigmalloc();
> >> }
> >>
> >> mtx_unlock(&Giant);
> >>
> >>another_thread:
> >>
> >>  mtx_lock(&Giant);
> >>
> >>  if(sc->data)
> >>  {
> >>    contigfree();
> >>    sc->data = NULL;
> >>  }
> >>
> >>  sc->gone = 1;
> >>
> >>  mtx_unlock(&Giant);
> >>
> >>
> >>The problem is that the undefined state: "sc->data != NULL" and
> >>"sc->gone == 1" can be reached.
> >
> > How about rewriting the code to be:
> > one_thread:
> >  tmpdata = contigmalloc();
> >  mtx_lock(&Giant);
> >  if(sc->gone == 0) {
> >   sc->data = tmpdata;
> >  } else {
> >   contigfree(tmpdata);
> >  }
> >  mtx_unlock(&Giant);
> >
> > another_thread:
> >  mtx_lock(&Giant);
> >  if(sc->data) {
> >   tmpdata = sc->data;
> >   sc->data = NULL;
> >  }
> >
> >  sc->gone = 1;
> >
> >  mtx_unlock(&Giant);
> >  contigfree(tmpdata);

When I worked with USB I ran into some synchronization problems with callbacks 
that leaded me to writing to the stack of another thread, which I hope is OK. 
What do you think about the following:

struct context
{
  struct mtx *mtx;
  u_int8_t *done_p;
};

struct callback
{
    struct context ctx;
 void *arg;
 void (*func)(void *, struct context *ctx);
 struct callback *next, **next_p;
};

callback_thread():
{

retry:

 struct callback * callbacks[max_callback];
 struct mtx * mtxs[max_callback];
 u_int8_t done[max_callback] = { /* zero */ };
 int x = 0;
 int repeat = 0;
 int retry = 0;

 mtx_lock(&global_callback_lock);

 while(1)
 {
    callbacks[x] = GET_NEXT_CALLBACK();
    if(callbacks[x] == NULL)
    {
       break;
    }

    if(callbacks[x]->ctx.done_p)
    {
      /* another thread is calling callback
       * ERROR
       */
      continue;
    }

    callbacks[x]->ctx.done_p = &done[x];
    mtxs[x] = callbacks[x]->ctx.mtx;

    x++;
    if(x == max_callback)
    {
      retry = 1;
      break;
    }
 }

  mtx_unlock(&global_callback_lock);

  /* here one needs to switch lock to avoid
   * locking order reversal
   */

  while(x--)
  {
    mtx_lock(mtxs[x]);

    if(done[x] == 0)
    {
       callbacks[x]->ctx.done_p = NULL;

       (callbacks[x]->func)(callbacks[x]->arg, &callback[x]->ctx);
    }
    /* else callback stopped */

    mtx_unlock(mtxs[x]);
  }

  if(retry)
  {
    retry = 0;
    goto retry;
  }
}

void
callback_stop(struct callback *callback)
{
  mtx_assert(callback->ctx.mtx, MA_OWNED);

  mtx_lock(&global_callback_lock);

  REMOVE callback from QUEUE if queued

  if(callback->ctx.done_p)
  {
    *(callback->ctx.done_p) = 1;
    callback->ctx.done_p = NULL;
  }

  mtx_unlock(&global_callback_lock);
}

void
callback_start(struct callback *callback)
{
  mtx_assert(callback->ctx.mtx, MA_OWNED);

  mtx_lock(&global_callback_lock);

  if((callback not QUEUED) && 
   (callback->ctx.done_p == NULL))
  {
    QUEUE callback;
  }

  mtx_unlock(&global_callback_lock);
}

Now I return to my initial problem. The callback pointed to by 
"callback->func" will call "contigmalloc()". How can I solve this, keeping in 
mind that "callback_stop()" should stop the callback call?

void
context_exit(struct context *ctx, u_int8_t *tmpvar, struct mtx **tmpmtx)
{
   if(ctx == NULL) { *tmpvar = 0; *tmpmtx = 0; return; }
   if(ctx->done_p)
   {
      panic("context already exited\n");
   }

   mtx_assert(ctx->mtx, MA_OWNED|MA_STATIC);

   ctx->done_p = tmpvar;
   *tmpvar = 0;
   *tmpmtx = ctx->mtx;
   mtx_unlock(ctx->mtx);
   return;
}

int
context_enter(struct context *ctx, u_int8_t *tmpvar, struct mtx **tmpmtx)
{
   if(ctx)
   {
     mtx_lock(*tmpmtx);

     if(*tmpvar == 0)
     { ctx->done_p = NULL; return 0; }
     else
     { return EGONE; }
   }
   return 0;
}

int
context_sleep(struct context *ctx)
{
   u_int8_t tmpvar;
   int error;

   if(ctx == NULL) { return tsleep(); }
   if(ctx->done_p)
   {
      panic("context already exited\n");
   }

   mtx_assert(ctx->mtx, MA_OWNED|MA_STATIC);

   ctx->done_p = &tmpvar;
   tmpvar = 0;

   error = msleep(... ctx->mtx ...);

   if(tmpvar != 0)
     error = EGONE;
   else { ctx->done_p = NULL; }

   return error;
}

static void
my_callback(void *arg, struct context *ctx)
{
  void *tmpdata;
  u_int8_t tmpvar;
  struct mutex *tmpmtx;

  context_exit(ctx, &tmpvar, &tmpmtx);

  tmpdata = contigmalloc();

  if(context_enter(ctx, &tempvar, &tmpmtx))
  {
     /* callback was stopped while allocating memory! */
     contigfree(tmpdata);
  }
  else
  {
    ((struct softc *)arg)->data = tmpdata;
  }
  return;
}

Now I'm going to take this even further. How can I run a "callback_thread_2()" 
from a callback called from "callback_thread()"? 
With the few routines developed so far, it can be done simple and _safe_:

static void
my_callback(void *arg, struct context *ctx)
{
  void *tmpdata;
  u_int8_t tmpvar;
  struct mutex *tmpmtx;

retry:

  struct callback * callbacks[max_callback];
  struct mtx * mtxs[max_callback];
  u_int8_t done[max_callback] = { /* zero */ };
  int x = 0;
  int repeat = 0;
  int retry = 0;

  /* our mutex is already locked! */

  while(1)
  {
    callbacks[x] = GET_NEXT_CALLBACK_2(); /* this is a new QUEUE */
    if(callbacks[x] == NULL)
    {
       break;
    }

    if(callbacks[x]->ctx.done_p)
    {
      /* another thread is calling callback
       * ERROR
       */
      continue;
    }

    callbacks[x]->ctx.done_p = &done[x];
    mtxs[x] = callbacks[x]->ctx.mtx;

    x++;
    if(x == max_callback)
    {
      retry = 1;
      break;
    }
  }

  if(x == 0) return; /* nothing to do */

  /* need to unlock this context before entering next context ! */

  context_exit(ctx, &tmpvar, &tmpmtx);

  while(x--)
  {
    mtx_lock(mtxs[x]);

    if(done[x] == 0)
    {
       callbacks[x]->ctx.done_p = NULL;

       (callbacks[x]->func)(callbacks[x]->arg, &callback[x]->ctx);
    }
    /* else callback stopped */

    mtx_unlock(mtxs[x]);
  }

  if(context_enter(ctx, &tmpvar, &tmpmtx))
  {
     /* this callback has been stopped */
     return;
  }

  if(retry)
  {
    retry = 0;
    goto retry;
  }
  return;
}

Now back to the initial problem. It would have been very nice if the kernel 
functions like malloc/contigmalloc/copyin/copyout ... could support the "ctx" 
variable:

static void
my_callback(void *arg, struct context *ctx)
{
  void *tmpdata;

  /* speedup: context is only unlocked when needed ! */

  tmpdata = contigmalloc(size, ..., ctx);

  if(tmpdata == ((void *)1))
  {
    /* context gone */
    return; 
  }

  if(tmpdata == NULL)
  {
     /* no memory allocated */
     return;
  }
  else
  {
    ((struct softc *)arg)->data = tmpdata;
  }
  return;
}

This way of doing it is what I have concluded with so far. Else one ends up 
with synchronization problems like that the callback is called after that 
callback_stop() has been called. Also this works in hierarchies: One is able 
to stop parenting callbacks aswell as a child callbacks.

My theorem:

1) the mutex used by callbacks must be present after that the callback has 
been destroyed/freed. In other words the mutex must reside in static memory. 
I suggest a new flag be available, MTX_STATIC, that can be set on "static" 
mutexes.

2) the parent caller of a callback must lock the child's lock. The reason is 
that one has to synchronize with the child before making the call.

3) routines infering with a callback must have child's lock locked first, then 
the parent's lock. This ensures proper locking order. 

4) the context from which a callback is called, must be passed on

So why do we want to do things like shown above? Because we get non-blocking 
behaviour when shutting things down. Then one can save a whole lot of worries 
and coding:

Before (specific):

        sc->sc_refcnt++;
        tmpdata = malloc(...);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeup(USBDEV(sc->sc_dev));

        sc->sc_refcnt++;
        error = uiomove();
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeup(USBDEV(sc->sc_dev));

        sc->sc_refcnt++;
        err = usbd_do_request(sc->sc_udev, &req, buf);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeup(USBDEV(sc->sc_dev));

After (generic):

       tmpdata = malloc(...,ctx);
       if(tmpdata <= ((void*)1)) return ENOMEM;

       error = uiomove(...,ctx);
       if(error) return error;

       err = usbd_do_request(sc->sc_udev, &req, buf, ctx);
       if(err == USBD_GONE)
       { return EGONE; }

This should also be implemented in the file-system:

  int
  xxx_read(struct cdev *dev, struct uio *uio, int flag)

changed into:

  int
  xxx_read(struct cdev *dev, struct uio *uio, int flag, struct context *ctx)

at detach:

  mtx_lock(sc->mtx); //NOTE: sc->mtx == ctx->mtx

  wakeup what can be sleeping (...);

  destroy_dev(...); //struct context is freed

  mtx_unlock(sc->mtx);

  free(sc);

  /* nothing more to worry about, and no loose ends are left,
   * just remember that the mutex is static
   */

  return;

at attach:

   sc->mtx = get_static_device_driver_mutex(...);
   if(sc->mtx == NULL) return ENOMEM;

   make_dev(..., sc->mtx);

........

I hope this wasn't too much for you to read :-)

Any comments ?

> >
> > That should do it..  Though you do need to have your own ref count on sc
> > to prevent the entire sc from going away before the first thread has
> > locked...  Anyways, you should be using your own lock that's in sc for
> > this instead of using Giant...

right

> I'd suggest just following a simplier and more deterministic path by
> either pre-allocating your contiguous buffers in a safe context, or
> allocating them on the fly before you depend on state being static.
> Our concept of 'sleep' and 'block' are a bit muddled now that we have
> liberal uses of sleep locks, and we as programmers need to cope and
> adjust.  It's always good form in embedded and kernel programming to
> pre-allocate and closely manage resources when you can; this isn't
> userland where resources are cheap.

Pre-allocating memory is good, but not always the easiest to do ...

--HPS