git: 454bc887f250 - main - uipc_shm: Implements fspacectl(2) support

Ka Ho Ng khng at FreeBSD.org
Thu Aug 12 15:06:30 UTC 2021


The branch main has been updated by khng:

URL: https://cgit.FreeBSD.org/src/commit/?id=454bc887f250ce0bceaabd0ec624d077269d3220

commit 454bc887f250ce0bceaabd0ec624d077269d3220
Author:     Ka Ho Ng <khng at FreeBSD.org>
AuthorDate: 2021-08-12 15:01:02 +0000
Commit:     Ka Ho Ng <khng at FreeBSD.org>
CommitDate: 2021-08-12 15:04:18 +0000

    uipc_shm: Implements fspacectl(2) support
    
    This implements fspacectl(2) support on shared memory objects. The
    semantic of SPACECTL_DEALLOC is equivalent to clearing the backing
    store and free the pages within the affected range. If the call
    succeeds, subsequent reads on the affected range return all zero.
    
    tests/sys/posixshm/posixshm_tests.c is expanded to include a
    fspacectl(2) functional test.
    
    Sponsored by:   The FreeBSD Foundation
    Reviewed by:    kevans, kib
    Differential Revision:  https://reviews.freebsd.org/D31490
---
 sys/kern/uipc_shm.c                | 197 ++++++++++++++++++++++++++++--------
 tests/sys/posixshm/posixshm_test.c | 199 +++++++++++++++++++++++++++++++++++++
 2 files changed, 354 insertions(+), 42 deletions(-)

diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 58c9f8cec239..16d1e22a898b 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -131,6 +131,8 @@ static int	shm_dotruncate_locked(struct shmfd *shmfd, off_t length,
     void *rl_cookie);
 static int	shm_copyin_path(struct thread *td, const char *userpath_in,
     char **path_out);
+static int	shm_deallocate(struct shmfd *shmfd, off_t *offset,
+    off_t *length, int flags);
 
 static fo_rdwr_t	shm_read;
 static fo_rdwr_t	shm_write;
@@ -146,6 +148,7 @@ static fo_mmap_t	shm_mmap;
 static fo_get_seals_t	shm_get_seals;
 static fo_add_seals_t	shm_add_seals;
 static fo_fallocate_t	shm_fallocate;
+static fo_fspacectl_t	shm_fspacectl;
 
 /* File descriptor operations. */
 struct fileops shm_ops = {
@@ -166,6 +169,7 @@ struct fileops shm_ops = {
 	.fo_get_seals = shm_get_seals,
 	.fo_add_seals = shm_add_seals,
 	.fo_fallocate = shm_fallocate,
+	.fo_fspacectl = shm_fspacectl,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE,
 };
 
@@ -626,14 +630,64 @@ out:
 	return (error);
 }
 
+static int
+shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
+    int end)
+{
+	vm_page_t m;
+	int rv;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	KASSERT(base >= 0, ("%s: base %d", __func__, base));
+	KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
+	    end));
+
+retry:
+	m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
+	if (m != NULL) {
+		MPASS(vm_page_all_valid(m));
+	} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
+		m = vm_page_alloc(object, idx,
+		    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
+		if (m == NULL)
+			goto retry;
+		vm_object_pip_add(object, 1);
+		VM_OBJECT_WUNLOCK(object);
+		rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
+		VM_OBJECT_WLOCK(object);
+		vm_object_pip_wakeup(object);
+		if (rv == VM_PAGER_OK) {
+			/*
+			 * Since the page was not resident, and therefore not
+			 * recently accessed, immediately enqueue it for
+			 * asynchronous laundering.  The current operation is
+			 * not regarded as an access.
+			 */
+			vm_page_launder(m);
+		} else {
+			vm_page_free(m);
+			VM_OBJECT_WUNLOCK(object);
+			return (EIO);
+		}
+	}
+	if (m != NULL) {
+		pmap_zero_page_area(m, base, end - base);
+		KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid",
+		    __func__, m));
+		vm_page_set_dirty(m);
+		vm_page_xunbusy(m);
+	}
+
+	return (0);
+}
+
 static int
 shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 {
 	vm_object_t object;
-	vm_page_t m;
-	vm_pindex_t idx, nobjsize;
+	vm_pindex_t nobjsize;
 	vm_ooffset_t delta;
-	int base, rv;
+	int base, error;
 
 	KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
 	object = shmfd->shm_object;
@@ -660,45 +714,10 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
 		 */
 		base = length & PAGE_MASK;
 		if (base != 0) {
-			idx = OFF_TO_IDX(length);
-retry:
-			m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
-			if (m != NULL) {
-				MPASS(vm_page_all_valid(m));
-			} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
-				m = vm_page_alloc(object, idx,
-				    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
-				if (m == NULL)
-					goto retry;
-				vm_object_pip_add(object, 1);
-				VM_OBJECT_WUNLOCK(object);
-				rv = vm_pager_get_pages(object, &m, 1, NULL,
-				    NULL);
-				VM_OBJECT_WLOCK(object);
-				vm_object_pip_wakeup(object);
-				if (rv == VM_PAGER_OK) {
-					/*
-					 * Since the page was not resident,
-					 * and therefore not recently
-					 * accessed, immediately enqueue it
-					 * for asynchronous laundering.  The
-					 * current operation is not regarded
-					 * as an access.
-					 */
-					vm_page_launder(m);
-				} else {
-					vm_page_free(m);
-					VM_OBJECT_WUNLOCK(object);
-					return (EIO);
-				}
-			}
-			if (m != NULL) {
-				pmap_zero_page_area(m, base, PAGE_SIZE - base);
-				KASSERT(vm_page_all_valid(m),
-				    ("shm_dotruncate: page %p is invalid", m));
-				vm_page_set_dirty(m);
-				vm_page_xunbusy(m);
-			}
+			error = shm_partial_page_invalidate(object,
+			    OFF_TO_IDX(length), base, PAGE_SIZE);
+			if (error)
+				return (error);
 		}
 		delta = IDX_TO_OFF(object->size - nobjsize);
 
@@ -1874,6 +1893,100 @@ shm_get_seals(struct file *fp, int *seals)
 	return (0);
 }
 
+static int
+shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags)
+{
+	vm_object_t object;
+	vm_pindex_t pistart, pi, piend;
+	vm_ooffset_t off, len;
+	int startofs, endofs, end;
+	int error;
+
+	off = *offset;
+	len = *length;
+	KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows"));
+	object = shmfd->shm_object;
+	startofs = off & PAGE_MASK;
+	endofs = (off + len) & PAGE_MASK;
+	pistart = OFF_TO_IDX(off);
+	piend = OFF_TO_IDX(off + len);
+	pi = OFF_TO_IDX(off + PAGE_MASK);
+	error = 0;
+
+	VM_OBJECT_WLOCK(object);
+
+	if (startofs != 0) {
+		end = pistart != piend ? PAGE_SIZE : endofs;
+		error = shm_partial_page_invalidate(object, pistart, startofs,
+		    end);
+		if (error)
+			goto out;
+		off += end - startofs;
+		len -= end - startofs;
+	}
+
+	if (pi < piend) {
+		vm_object_page_remove(object, pi, piend, 0);
+		off += IDX_TO_OFF(piend - pi);
+		len -= IDX_TO_OFF(piend - pi);
+	}
+
+	if (endofs != 0 && pistart != piend) {
+		error = shm_partial_page_invalidate(object, piend, 0, endofs);
+		if (error)
+			goto out;
+		off += endofs;
+		len -= endofs;
+	}
+
+out:
+	VM_OBJECT_WUNLOCK(shmfd->shm_object);
+	*offset = off;
+	*length = len;
+	return (error);
+}
+
+static int
+shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags,
+    struct ucred *active_cred, struct thread *td)
+{
+	void *rl_cookie;
+	struct shmfd *shmfd;
+	off_t off, len;
+	int error;
+
+	/* This assumes that the caller already checked for overflow. */
+	error = EINVAL;
+	shmfd = fp->f_data;
+	off = *offset;
+	len = *length;
+
+	if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 ||
+	    len > OFF_MAX - off || flags != 0)
+		return (EINVAL);
+
+	rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len,
+	    &shmfd->shm_mtx);
+	switch (cmd) {
+	case SPACECTL_DEALLOC:
+		if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) {
+			error = EPERM;
+			break;
+		}
+		error = shm_deallocate(shmfd, &off, &len, flags);
+		if (error != 0)
+			break;
+		*offset = off;
+		*length = len;
+		break;
+	default:
+		__assert_unreachable();
+	}
+	rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+	return (error);
+}
+
+
 static int
 shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
 {
diff --git a/tests/sys/posixshm/posixshm_test.c b/tests/sys/posixshm/posixshm_test.c
index d1c1b14aef65..eddb1d2d8250 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -2,6 +2,11 @@
  * Copyright (c) 2006 Robert N. M. Watson
  * All rights reserved.
  *
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Ka Ho Ng
+ * under sponsorship from the FreeBSD Foundation.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -173,6 +178,126 @@ verify_object(const char *path, char expected_value)
 	close(fd);
 }
 
+static off_t shm_max_pages = 32;
+static const char byte_to_fill = 0x5f;
+
+static int
+shm_fill(int fd, off_t offset, off_t len)
+{
+	int error;
+	size_t blen;
+	char *buf;
+	error = 0;
+	buf = malloc(PAGE_SIZE);
+	if (buf == NULL)
+		return (1);
+
+	while (len > 0) {
+		blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE;
+		memset(buf, byte_to_fill, blen);
+		if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) {
+			error = 1;
+			break;
+		}
+		len -= blen;
+		offset += blen;
+	}
+
+	free(buf);
+	return (error);
+}
+
+static int
+check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz)
+{
+	int error;
+	size_t blen;
+	off_t offset, resid;
+	struct stat statbuf;
+	char *buf, *sblk;
+
+	error = 0;
+	buf = malloc(PAGE_SIZE * 2);
+	if (buf == NULL)
+		return (1);
+	sblk = buf + PAGE_SIZE;
+
+	memset(sblk, 0, PAGE_SIZE);
+
+	if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz)
+		hole_len = shm_sz - hole_start;
+
+	/*
+	 * Check hole is zeroed.
+	 */
+	offset = hole_start;
+	resid = hole_len;
+	while (resid > 0) {
+		blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+		if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+			error = 1;
+			break;
+		}
+		if (memcmp(buf, sblk, blen) != 0) {
+			error = 1;
+			break;
+		}
+		resid -= blen;
+		offset += blen;
+	}
+
+	memset(sblk, byte_to_fill, PAGE_SIZE);
+
+	/*
+	 * Check file region before hole is zeroed.
+	 */
+	offset = 0;
+	resid = hole_start;
+	while (resid > 0) {
+		blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+		if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+			error = 1;
+			break;
+		}
+		if (memcmp(buf, sblk, blen) != 0) {
+			error = 1;
+			break;
+		}
+		resid -= blen;
+		offset += blen;
+	}
+
+	/*
+	 * Check file region after hole is zeroed.
+	 */
+	offset = hole_start + hole_len;
+	resid = shm_sz - offset;
+	while (resid > 0) {
+		blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+		if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+			error = 1;
+			break;
+		}
+		if (memcmp(buf, sblk, blen) != 0) {
+			error = 1;
+			break;
+		}
+		resid -= blen;
+		offset += blen;
+	}
+
+	/*
+	 * Check file size matches with expected file size.
+	 */
+	if (fstat(fd, &statbuf) == -1)
+		error = -1;
+	if (statbuf.st_size != shm_sz)
+		error = -1;
+
+	free(buf);
+	return (error);
+}
+
 ATF_TC_WITHOUT_HEAD(remap_object);
 ATF_TC_BODY(remap_object, tc)
 {
@@ -958,6 +1083,79 @@ ATF_TC_BODY(fallocate, tc)
 	close(fd);
 }
 
+ATF_TC_WITHOUT_HEAD(fspacectl);
+ATF_TC_BODY(fspacectl, tc)
+{
+	struct spacectl_range range;
+	off_t offset, length, shm_sz;
+	int fd, error;
+
+	shm_sz = shm_max_pages << PAGE_SHIFT;
+
+	fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666);
+	ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno);
+	ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0,
+	    "posix_fallocate failed; error=%d", error);
+
+	/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = PAGE_SIZE;
+	range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) -
+	    range.r_offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Aligned fspacectl failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Aligned fspacectl content checking failed", errno);
+
+	/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+	range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) +
+	    (1 << (PAGE_SHIFT - 1)) - offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Unaligned fspacectl failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Unaligned fspacectl content checking failed", errno);
+
+	/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = PAGE_SHIFT;
+	range.r_len = length = OFF_MAX - offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Aligned fspacectl to OFF_MAX failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Aligned fspacectl to OFF_MAX content checking failed", errno);
+
+	/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+	range.r_len = length = OFF_MAX - offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Unaligned fspacectl to OFF_MAX failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Unaligned fspacectl to OFF_MAX content checking failed", errno);
+
+	/* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = PAGE_SIZE;
+	range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Aligned fspacectl past shm_sz failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Aligned fspacectl past shm_sz content checking failed", errno);
+
+	/* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+	ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+	range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+	range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+	ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+	    "Unaligned fspacectl past shm_sz failed; errno=%d", errno);
+	ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+	    "Unaligned fspacectl past shm_sz content checking failed", errno);
+
+	ATF_REQUIRE(close(fd) == 0);
+}
+
 static int
 shm_open_large(int psind, int policy, size_t sz)
 {
@@ -1704,6 +1902,7 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, cloexec);
 	ATF_TP_ADD_TC(tp, mode);
 	ATF_TP_ADD_TC(tp, fallocate);
+	ATF_TP_ADD_TC(tp, fspacectl);
 	ATF_TP_ADD_TC(tp, largepage_basic);
 	ATF_TP_ADD_TC(tp, largepage_config);
 	ATF_TP_ADD_TC(tp, largepage_mmap);


More information about the dev-commits-src-all mailing list