git: eca39864f702 - main - Add sysctl KERN_LOCKF

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sat, 09 Apr 2022 21:48:21 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=eca39864f702e577eba3bc7e9992d1e5e56eba58

commit eca39864f702e577eba3bc7e9992d1e5e56eba58
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-04-01 23:23:06 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-04-09 21:43:53 +0000

    Add sysctl KERN_LOCKF
    
    reporting the shapshot of the active advisory locks.
    
    A new VFS ops method vfs_report_lockf if provided in the mount point
    op table.  If it is NULL, as it is currently for all existing
    filesystems, vfs_report_lockf() function is used, which gathers
    information from the standard implementation inside kern/kern_lockf.c.
    
    Filesystems implementing its own locking (NFSv4 as example) can provide
    a custom implementation.
    
    Reviewed by:    markj, rmacklem
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D34756
---
 sys/kern/kern_lockf.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/kern/vfs_init.c   |  15 +++++-
 sys/sys/mount.h       |   4 ++
 sys/sys/sysctl.h      |   1 +
 4 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c
index 2455c84ad65c..cad208197e76 100644
--- a/sys/kern/kern_lockf.c
+++ b/sys/kern/kern_lockf.c
@@ -68,14 +68,18 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hash.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
+#include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
@@ -2459,6 +2463,139 @@ graph_init(struct owner_graph *g)
 	return (g);
 }
 
+struct kinfo_lockf_linked {
+	struct kinfo_lockf kl;
+	struct vnode *vp;
+	STAILQ_ENTRY(kinfo_lockf_linked) link;
+};
+
+int
+vfs_report_lockf(struct mount *mp, struct sbuf *sb)
+{
+	struct lockf *ls;
+	struct lockf_entry *lf;
+	struct kinfo_lockf_linked *klf;
+	struct vnode *vp;
+	struct ucred *ucred;
+	char *fullpath, *freepath;
+	struct stat stt;
+	fsid_t fsidx;
+	STAILQ_HEAD(, kinfo_lockf_linked) locks;
+	int error, gerror;
+
+	STAILQ_INIT(&locks);
+	sx_slock(&lf_lock_states_lock);
+	LIST_FOREACH(ls, &lf_lock_states, ls_link) {
+		sx_slock(&ls->ls_lock);
+		LIST_FOREACH(lf, &ls->ls_active, lf_link) {
+			vp = lf->lf_vnode;
+			if (VN_IS_DOOMED(vp) || vp->v_mount != mp)
+				continue;
+			vhold(vp);
+			klf = malloc(sizeof(struct kinfo_lockf_linked),
+			    M_LOCKF, M_WAITOK | M_ZERO);
+			klf->vp = vp;
+			klf->kl.kl_structsize = sizeof(struct kinfo_lockf);
+			klf->kl.kl_start = lf->lf_start;
+			klf->kl.kl_len = lf->lf_end == OFF_MAX ? 0 :
+			    lf->lf_end - lf->lf_start + 1;
+			klf->kl.kl_rw = lf->lf_type == F_RDLCK ?
+			    KLOCKF_RW_READ : KLOCKF_RW_WRITE;
+			if (lf->lf_owner->lo_sysid != 0) {
+				klf->kl.kl_pid = lf->lf_owner->lo_pid;
+				klf->kl.kl_sysid = lf->lf_owner->lo_sysid;
+				klf->kl.kl_type = KLOCKF_TYPE_REMOTE;
+			} else if (lf->lf_owner->lo_pid == -1) {
+				klf->kl.kl_pid = -1;
+				klf->kl.kl_sysid = 0;
+				klf->kl.kl_type = KLOCKF_TYPE_FLOCK;
+			} else {
+				klf->kl.kl_pid = lf->lf_owner->lo_pid;
+				klf->kl.kl_sysid = 0;
+				klf->kl.kl_type = KLOCKF_TYPE_PID;
+			}
+			STAILQ_INSERT_TAIL(&locks, klf, link);
+		}
+		sx_sunlock(&ls->ls_lock);
+	}
+	sx_sunlock(&lf_lock_states_lock);
+
+	gerror = 0;
+	ucred = curthread->td_ucred;
+	fsidx = mp->mnt_stat.f_fsid;
+	while ((klf = STAILQ_FIRST(&locks)) != NULL) {
+		STAILQ_REMOVE_HEAD(&locks, link);
+		vp = klf->vp;
+		if (gerror == 0 && vn_lock(vp, LK_SHARED) == 0) {
+			error = prison_canseemount(ucred, vp->v_mount);
+			if (error == 0)
+				error = VOP_STAT(vp, &stt, ucred, NOCRED);
+			VOP_UNLOCK(vp);
+			if (error == 0) {
+				memcpy(&klf->kl.kl_file_fsid, &fsidx,
+				    sizeof(fsidx));
+				klf->kl.kl_file_rdev = stt.st_rdev;
+				klf->kl.kl_file_fileid = stt.st_ino;
+				freepath = NULL;
+				fullpath = "-";
+				error = vn_fullpath(vp, &fullpath, &freepath);
+				if (error == 0)
+					strlcpy(klf->kl.kl_path, fullpath,
+					    sizeof(klf->kl.kl_path));
+				free(freepath, M_TEMP);
+				if (sbuf_bcat(sb, &klf->kl,
+				    klf->kl.kl_structsize) != 0) {
+					gerror = sbuf_error(sb);
+				}
+			}
+		}
+		vdrop(vp);
+		free(klf, M_LOCKF);
+	}
+
+	return (gerror);
+}
+
+static int
+sysctl_kern_lockf_run(struct sbuf *sb)
+{
+	struct mount *mp;
+	int error;
+
+	error = 0;
+	mtx_lock(&mountlist_mtx);
+	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
+		error = vfs_busy(mp, MBF_MNTLSTLOCK);
+		if (error != 0)
+			continue;
+		error = mp->mnt_op->vfs_report_lockf(mp, sb);
+		mtx_lock(&mountlist_mtx);
+		vfs_unbusy(mp);
+		if (error != 0)
+			break;
+	}
+	mtx_unlock(&mountlist_mtx);
+	return (error);
+}
+
+static int
+sysctl_kern_lockf(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf sb;
+	int error, error2;
+
+	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_lockf) * 5, req);
+	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
+	error = sysctl_kern_lockf_run(&sb);
+	error2 = sbuf_finish(&sb);
+	sbuf_delete(&sb);
+	return (error != 0 ? error : error2);
+}
+SYSCTL_PROC(_kern, KERN_LOCKF, lockf,
+    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+    0, 0, sysctl_kern_lockf, "S,lockf",
+    "Advisory locks table");
+
 #ifdef LOCKF_DEBUG
 /*
  * Print description of a lock owner
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index 612cc06a0db7..d6065deb25fe 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -352,6 +352,17 @@ vfs_purge_sigdefer(struct mount *mp)
 	sigallowstop(prev_stops);
 }
 
+static int
+vfs_report_lockf_sigdefer(struct mount *mp, struct sbuf *sb)
+{
+	int prev_stops, rc;
+
+	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
+	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_report_lockf)(mp, sb);
+	sigallowstop(prev_stops);
+	return (rc);
+}
+
 static struct vfsops vfsops_sigdefer = {
 	.vfs_mount =		vfs_mount_sigdefer,
 	.vfs_unmount =		vfs_unmount_sigdefer,
@@ -369,7 +380,7 @@ static struct vfsops vfsops_sigdefer = {
 	.vfs_reclaim_lowervp =	vfs_reclaim_lowervp_sigdefer,
 	.vfs_unlink_lowervp =	vfs_unlink_lowervp_sigdefer,
 	.vfs_purge =		vfs_purge_sigdefer,
-
+	.vfs_report_lockf =	vfs_report_lockf_sigdefer,
 };
 
 /* Register a new filesystem type in the global table */
@@ -483,6 +494,8 @@ vfs_register(struct vfsconf *vfc)
 		vfsops->vfs_extattrctl = vfs_stdextattrctl;
 	if (vfsops->vfs_sysctl == NULL)
 		vfsops->vfs_sysctl = vfs_stdsysctl;
+	if (vfsops->vfs_report_lockf == NULL)
+		vfsops->vfs_report_lockf = vfs_report_lockf;
 
 	if ((vfc->vfc_flags & VFCF_SBDRY) != 0) {
 		vfc->vfc_vfsops_sd = vfc->vfc_vfsops;
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 6941048656d1..3383bfe8f431 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -804,6 +804,8 @@ typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
 typedef void vfs_susp_clean_t(struct mount *mp);
 typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
 typedef void vfs_purge_t(struct mount *mp);
+struct sbuf;
+typedef int vfs_report_lockf_t(struct mount *mp, struct sbuf *sb);
 
 struct vfsops {
 	vfs_mount_t		*vfs_mount;
@@ -825,6 +827,7 @@ struct vfsops {
 	vfs_notify_lowervp_t	*vfs_reclaim_lowervp;
 	vfs_notify_lowervp_t	*vfs_unlink_lowervp;
 	vfs_purge_t		*vfs_purge;
+	vfs_report_lockf_t	*vfs_report_lockf;
 	vfs_mount_t		*vfs_spare[6];	/* spares for ABI compat */
 };
 
@@ -1039,6 +1042,7 @@ void	vfs_unregister_for_notification(struct mount *,
 	    struct mount_upper_node *);
 void	vfs_unregister_upper(struct mount *, struct mount_upper_node *);
 int	vfs_remount_ro(struct mount *mp);
+int	vfs_report_lockf(struct mount *mp, struct sbuf *sb);
 
 extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct mtx_padalign mountlist_mtx;
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index f25152db8215..451d83bbe125 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -976,6 +976,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 #define	KERN_HOSTUUID		36	/* string: host UUID identifier */
 #define	KERN_ARND		37	/* int: from arc4rand() */
 #define	KERN_MAXPHYS		38	/* int: MAXPHYS value */
+#define	KERN_LOCKF		39	/* struct: lockf reports */
 /*
  * KERN_PROC subtypes
  */