git: d689fda0703b - stable/13 - shmfd: account for the actually allocated pages

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Fri, 20 Jan 2023 03:23:35 UTC
The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=d689fda0703bd7bf7320fe6e34f2abf260650f8e

commit d689fda0703bd7bf7320fe6e34f2abf260650f8e
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-11-23 21:26:38 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-01-20 03:20:02 +0000

    shmfd: account for the actually allocated pages
    
    Tested by:      pho
    
    (cherry picked from commit 0919f29d91031cf42eca99266bffe7b0faef8f1b)
---
 sys/kern/uipc_shm.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++----
 sys/sys/mman.h      |  1 +
 2 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index a1f26b40f343..f1dc0f828b23 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -343,6 +343,61 @@ shm_largepage(struct shmfd *shmfd)
 	return (shmfd->shm_object->type == OBJT_PHYS);
 }
 
+static void
+shm_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size)
+{
+	struct shmfd *shm;
+	vm_size_t c;
+
+	swap_pager_freespace(obj, start, size, &c);
+	if (c == 0)
+		return;
+
+	shm = obj->un_pager.swp.swp_priv;
+	if (shm == NULL)
+		return;
+	KASSERT(shm->shm_pages >= c,
+	    ("shm %p pages %jd free %jd", shm,
+	    (uintmax_t)shm->shm_pages, (uintmax_t)c));
+	shm->shm_pages -= c;
+}
+
+static void
+shm_page_inserted(vm_object_t obj, vm_page_t m)
+{
+	struct shmfd *shm;
+
+	shm = obj->un_pager.swp.swp_priv;
+	if (shm == NULL)
+		return;
+	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL))
+		shm->shm_pages += 1;
+}
+
+static void
+shm_page_removed(vm_object_t obj, vm_page_t m)
+{
+	struct shmfd *shm;
+
+	shm = obj->un_pager.swp.swp_priv;
+	if (shm == NULL)
+		return;
+	if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
+		KASSERT(shm->shm_pages >= 1,
+		    ("shm %p pages %jd free 1", shm,
+		    (uintmax_t)shm->shm_pages));
+		shm->shm_pages -= 1;
+	}
+}
+
+static struct pagerops shm_swap_pager_ops = {
+	.pgo_kvme_type = KVME_TYPE_SWAP,
+	.pgo_freespace = shm_pager_freespace,
+	.pgo_page_inserted = shm_page_inserted,
+	.pgo_page_removed = shm_page_removed,
+};
+static int shmfd_pager_type = -1;
+
 static int
 shm_seek(struct file *fp, off_t offset, int whence, struct thread *td)
 {
@@ -560,7 +615,6 @@ shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
 	bzero(sb, sizeof(*sb));
 	sb->st_blksize = PAGE_SIZE;
 	sb->st_size = shmfd->shm_size;
-	sb->st_blocks = howmany(sb->st_size, sb->st_blksize);
 	mtx_lock(&shm_timestamp_lock);
 	sb->st_atim = shmfd->shm_atime;
 	sb->st_ctim = shmfd->shm_ctime;
@@ -573,8 +627,12 @@ shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
 	sb->st_dev = shm_dev_ino;
 	sb->st_ino = shmfd->shm_ino;
 	sb->st_nlink = shmfd->shm_object->ref_count;
-	sb->st_blocks = shmfd->shm_object->size /
-	    (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT);
+	if (shm_largepage(shmfd)) {
+		sb->st_blocks = shmfd->shm_object->size /
+		    (pagesizes[shmfd->shm_lp_psind] >> PAGE_SHIFT);
+	} else {
+		sb->st_blocks = shmfd->shm_pages;
+	}
 
 	return (0);
 }
@@ -858,6 +916,7 @@ struct shmfd *
 shm_alloc(struct ucred *ucred, mode_t mode, bool largepage)
 {
 	struct shmfd *shmfd;
+	vm_object_t obj;
 
 	shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
 	shmfd->shm_size = 0;
@@ -870,8 +929,12 @@ shm_alloc(struct ucred *ucred, mode_t mode, bool largepage)
 		    VM_PROT_DEFAULT, 0, ucred);
 		shmfd->shm_lp_alloc_policy = SHM_LARGEPAGE_ALLOC_DEFAULT;
 	} else {
-		shmfd->shm_object = vm_pager_allocate(OBJT_SWAP, NULL,
+		obj = vm_pager_allocate(shmfd_pager_type, NULL,
 		    shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
+		VM_OBJECT_WLOCK(obj);
+		obj->un_pager.swp.swp_priv = shmfd;
+		VM_OBJECT_WUNLOCK(obj);
+		shmfd->shm_object = obj;
 	}
 	KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
 	vfs_timestamp(&shmfd->shm_birthtime);
@@ -900,6 +963,7 @@ shm_hold(struct shmfd *shmfd)
 void
 shm_drop(struct shmfd *shmfd)
 {
+	vm_object_t obj;
 
 	if (refcount_release(&shmfd->shm_refs)) {
 #ifdef MAC
@@ -907,7 +971,13 @@ shm_drop(struct shmfd *shmfd)
 #endif
 		rangelock_destroy(&shmfd->shm_rl);
 		mtx_destroy(&shmfd->shm_mtx);
-		vm_object_deallocate(shmfd->shm_object);
+		obj = shmfd->shm_object;
+		if (!shm_largepage(shmfd)) {
+			VM_OBJECT_WLOCK(obj);
+			obj->un_pager.swp.swp_priv = NULL;
+			VM_OBJECT_WUNLOCK(obj);
+		}
+		vm_object_deallocate(obj);
 		free(shmfd, M_SHMFD);
 	}
 }
@@ -946,6 +1016,9 @@ shm_init(void *arg)
 	new_unrhdr64(&shm_ino_unr, 1);
 	shm_dev_ino = devfs_alloc_cdp_inode();
 	KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized"));
+	shmfd_pager_type = vm_pager_alloc_dyn_type(&shm_swap_pager_ops,
+	    OBJT_SWAP);
+	MPASS(shmfd_pager_type != -1);
 
 	for (i = 1; i < MAXPAGESIZES; i++) {
 		if (pagesizes[i] == 0)
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
index 71172bfd49a3..0413249dee2c 100644
--- a/sys/sys/mman.h
+++ b/sys/sys/mman.h
@@ -268,6 +268,7 @@ struct file;
 struct shmfd {
 	vm_ooffset_t	shm_size;
 	vm_object_t	shm_object;
+	vm_pindex_t	shm_pages;	/* allocated pages */
 	int		shm_refs;
 	uid_t		shm_uid;
 	gid_t		shm_gid;