Re: git: 84f981ba57e7 - main - nullfs: shrink null_node to 32 bytes

From: Mateusz Guzik <mjguzik_at_gmail.com>
Date: Mon, 06 Oct 2025 22:31:37 UTC
On Tue, Oct 7, 2025 at 12:30 AM Mateusz Guzik <mjguzik@gmail.com> wrote:
>
> On Tue, Oct 7, 2025 at 12:25 AM Konstantin Belousov <kostikbel@gmail.com> wrote:
> >
> > On Mon, Oct 06, 2025 at 06:23:07PM +0000, Mateusz Guzik wrote:
> > > The branch main has been updated by mjg:
> > >
> > > URL: https://cgit.FreeBSD.org/src/commit/?id=84f981ba57e77bd3c3d0fbf1469ce51bfd132a6b
> > >
> > > commit 84f981ba57e77bd3c3d0fbf1469ce51bfd132a6b
> > > Author:     Mateusz Guzik <mjg@FreeBSD.org>
> > > AuthorDate: 2025-10-06 17:59:17 +0000
> > > Commit:     Mateusz Guzik <mjg@FreeBSD.org>
> > > CommitDate: 2025-10-06 18:23:01 +0000
> > >
> > >     nullfs: shrink null_node to 32 bytes
> > > ---
> > >  sys/fs/nullfs/null.h      |  2 +-
> > >  sys/fs/nullfs/null_subr.c | 12 +++++++-----
> > >  2 files changed, 8 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
> > > index aa7a689bec34..ad3f7779e108 100644
> > > --- a/sys/fs/nullfs/null.h
> > > +++ b/sys/fs/nullfs/null.h
> > > @@ -53,7 +53,7 @@ struct null_mount {
> > >   * A cache of vnode references
> > >   */
> > >  struct null_node {
> > > -     CK_LIST_ENTRY(null_node) null_hash;     /* Hash list */
> > > +     CK_SLIST_ENTRY(null_node) null_hash;    /* Hash list */
> > >       struct vnode            *null_lowervp;  /* VREFed once */
> > >       struct vnode            *null_vnode;    /* Back pointer */
> > >       u_int                   null_flags;
> > > diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
> > > index ad8cd08279cc..bb0ff9966dfd 100644
> > > --- a/sys/fs/nullfs/null_subr.c
> > > +++ b/sys/fs/nullfs/null_subr.c
> > > @@ -59,7 +59,7 @@ VFS_SMR_DECLARE;
> > >
> > >  #define      NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
> > >
> > > -static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
> > > +static CK_SLIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
> > >  static struct rwlock null_hash_lock;
> > >  static u_long null_hash_mask;
> > >
> > > @@ -116,7 +116,7 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp)
> > >        * reference count (but NOT the lower vnode's VREF counter).
> > >        */
> > >       hd = NULL_NHASH(lowervp);
> > > -     CK_LIST_FOREACH(a, hd, null_hash) {
> > > +     CK_SLIST_FOREACH(a, hd, null_hash) {
> > >               if (a->null_lowervp != lowervp)
> > >                       continue;
> > >               /*
> > > @@ -148,7 +148,7 @@ null_hashget(struct mount *mp, struct vnode *lowervp)
> > >
> > >       vfs_smr_enter();
> > >       hd = NULL_NHASH(lowervp);
> > > -     CK_LIST_FOREACH(a, hd, null_hash) {
> > > +     CK_SLIST_FOREACH(a, hd, null_hash) {
> > >               if (a->null_lowervp != lowervp)
> > >                       continue;
> > >               /*
> > > @@ -189,7 +189,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
> > >               }
> > >       }
> > >  #endif
> > > -     CK_LIST_INSERT_HEAD(hd, xp, null_hash);
> > > +     CK_SLIST_INSERT_HEAD(hd, xp, null_hash);
> > >  }
> > >
> > >  static void
> > > @@ -305,9 +305,11 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
> > >  void
> > >  null_hashrem(struct null_node *xp)
> > >  {
> > > +     struct null_node_hashhead *hd;
> > >
> > > +     hd = NULL_NHASH(xp->null_lowervp);
> > >       rw_wlock(&null_hash_lock);
> > > -     CK_LIST_REMOVE(xp, null_hash);
> > > +     CK_SLIST_REMOVE(hd, xp, null_node, null_hash);
> > This changes O(1) removal into O(N), for N being the size of the hash
> > chain length for specific hash.  I.e. it is on par with the lookup.
> >
> > Why it is fine?  Why it was not mentioned and explained in the commit
> > message?
> >
>
> For the hash to be at all useful the chains are supposed to be short.
>
> Or to put it differently, a sensibly sized hash with a sensible
> hashing function makes it automatically fine.
>
> This was worthwhile to do because the original size of 40 bytes is a
> very poor fit for the allocator.
>
> fwiw the namecache already works this way for few years.
>
> the regular vnode hash should probably get the same treatment, saving
> 8 bytes off of struct vnode

I can concede it would make sense to include this in the commit message.