git: d14b53ee31ca - main - cuse(3): Allow shared memory allocations up to, but excluding 2 GBytes.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Wed, 20 Jul 2022 08:47:10 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=d14b53ee31ca06933a4f8ef2e48ce33cf3dd5ec9

commit d14b53ee31ca06933a4f8ef2e48ce33cf3dd5ec9
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-07-13 16:17:40 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-07-20 08:41:11 +0000

    cuse(3): Allow shared memory allocations up to, but excluding 2 GBytes.
    
    Currently the cuse(3) mmap(2) offset is split into 128 banks of 16 Mbytes.
    Allow cuse(3) to make allocations that span multiple banks at the expense
    of any fragmentation issues that may arise. Typically mmap(2) buffers are
    well below 16 Mbytes. This allows 8K video resolution to work using webcamd.
    
    Reviewed by:    markj @
    Differential Revision:  https://reviews.freebsd.org/D35830
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 lib/libcuse/cuse_lib.c   |  91 ++++++++++++++++++--------------------
 sys/fs/cuse/cuse.c       | 111 +++++++++++++++++------------------------------
 sys/fs/cuse/cuse_defs.h  |   2 +-
 sys/fs/cuse/cuse_ioctl.h |   6 ++-
 4 files changed, 88 insertions(+), 122 deletions(-)

diff --git a/lib/libcuse/cuse_lib.c b/lib/libcuse/cuse_lib.c
index d241ce1dc4ac..ec300add5903 100644
--- a/lib/libcuse/cuse_lib.c
+++ b/lib/libcuse/cuse_lib.c
@@ -145,7 +145,7 @@ cuse_vmoffset(void *_ptr)
 	unsigned long n;
 
 	CUSE_LOCK();
-	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
+	for (n = remainder = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
 		if (a_cuse[n].ptr == NULL)
 			continue;
 
@@ -153,20 +153,13 @@ cuse_vmoffset(void *_ptr)
 		ptr_max = a_cuse[n].ptr + a_cuse[n].size - 1;
 
 		if ((ptr >= ptr_min) && (ptr <= ptr_max)) {
-
-			CUSE_UNLOCK();
-
 			remainder = (ptr - ptr_min);
-
-			remainder -= remainder %
-			    (unsigned long)getpagesize();
-
-			return ((n * CUSE_ALLOC_BYTES_MAX) + remainder);
+			break;
 		}
 	}
 	CUSE_UNLOCK();
 
-	return (0x80000000UL);		/* failure */
+	return ((n << CUSE_ALLOC_UNIT_SHIFT) + remainder);
 }
 
 void   *
@@ -174,70 +167,70 @@ cuse_vmalloc(int size)
 {
 	struct cuse_alloc_info info;
 	unsigned long pgsize;
+	unsigned long x;
+	unsigned long m;
 	unsigned long n;
 	void *ptr;
 	int error;
 
-	if (f_cuse < 0)
+	/* some sanity checks */
+	if (f_cuse < 0 || size < 1 || (unsigned long)size > CUSE_ALLOC_BYTES_MAX)
 		return (NULL);
 
 	memset(&info, 0, sizeof(info));
 
-	if (size < 1)
-		return (NULL);
-
 	pgsize = getpagesize();
 	info.page_count = howmany(size, pgsize);
 
-	CUSE_LOCK();
-	for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) {
+	/* compute how many units the allocation needs */
+	m = howmany(size, 1 << CUSE_ALLOC_UNIT_SHIFT);
+	if (m == 0 || m > CUSE_ALLOC_UNIT_MAX)
+		return (NULL);
 
-		if (a_cuse[n].ptr != NULL)
+	CUSE_LOCK();
+	for (n = 0; n <= CUSE_ALLOC_UNIT_MAX - m; ) {
+		if (a_cuse[n].size != 0) {
+			/* skip to next available unit, depending on allocation size */
+			n += howmany(a_cuse[n].size, 1 << CUSE_ALLOC_UNIT_SHIFT);
 			continue;
-
-		a_cuse[n].ptr = ((uint8_t *)1);	/* reserve */
-		a_cuse[n].size = 0;
-
+		}
+		/* check if there are "m" free units ahead */
+		for (x = 1; x != m; x++) {
+			if (a_cuse[n + x].size != 0)
+				break;
+		}
+		if (x != m) {
+			/* skip to next available unit, if any */
+			n += x + 1;
+			continue;
+		}
+		/* reserve this unit by setting the size to a non-zero value */
+		a_cuse[n].size = size;
 		CUSE_UNLOCK();
 
 		info.alloc_nr = n;
 
 		error = ioctl(f_cuse, CUSE_IOCTL_ALLOC_MEMORY, &info);
 
-		if (error) {
-
-			CUSE_LOCK();
-
-			a_cuse[n].ptr = NULL;
-
-			if (errno == EBUSY)
-				continue;
-			else
-				break;
-		}
-		ptr = mmap(NULL, info.page_count * pgsize,
-		    PROT_READ | PROT_WRITE,
-		    MAP_SHARED, f_cuse, CUSE_ALLOC_BYTES_MAX * n);
-
-		if (ptr == MAP_FAILED) {
+		if (error == 0) {
+			ptr = mmap(NULL, info.page_count * pgsize,
+			    PROT_READ | PROT_WRITE,
+			    MAP_SHARED, f_cuse, n << CUSE_ALLOC_UNIT_SHIFT);
 
-			error = ioctl(f_cuse, CUSE_IOCTL_FREE_MEMORY, &info);
+			if (ptr != MAP_FAILED) {
+				CUSE_LOCK();
+				a_cuse[n].ptr = ptr;
+				CUSE_UNLOCK();
 
-			if (error) {
-				/* ignore */
+				return (ptr);		/* success */
 			}
-			CUSE_LOCK();
 
-			a_cuse[n].ptr = NULL;
-
-			break;
+			(void) ioctl(f_cuse, CUSE_IOCTL_FREE_MEMORY, &info);
 		}
-		CUSE_LOCK();
-		a_cuse[n].ptr = ptr;
-		a_cuse[n].size = size;
-		CUSE_UNLOCK();
 
-		return (ptr);		/* success */
+		CUSE_LOCK();
+		a_cuse[n].size = 0;
+		n++;
 	}
 	CUSE_UNLOCK();
 	return (NULL);			/* failure */
diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
index 5a0d0263738e..62b53d232ee2 100644
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -1329,50 +1329,57 @@ cuse_server_poll(struct cdev *dev, int events, struct thread *td)
 }
 
 static int
-cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
-    vm_size_t size, struct vm_object **object, int nprot)
+cuse_common_mmap_single(struct cuse_server *pcs,
+    vm_ooffset_t *offset, vm_size_t size, struct vm_object **object)
 {
-	uint32_t page_nr = *offset / PAGE_SIZE;
-	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
-	struct cuse_memory *mem;
-	struct cuse_server *pcs;
+  	struct cuse_memory *mem;
 	int error;
 
-	error = cuse_server_get(&pcs);
-	if (error != 0)
-		return (error);
+	/* verify size */
+	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE))
+		return (EINVAL);
 
 	cuse_server_lock(pcs);
-	/* lookup memory structure */
+	error = ENOMEM;
+
+	/* lookup memory structure, if any */
 	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
-		if (mem->alloc_nr == alloc_nr)
+		vm_ooffset_t min_off;
+		vm_ooffset_t max_off;
+
+		min_off = (mem->alloc_nr << CUSE_ALLOC_UNIT_SHIFT);
+		max_off = min_off + (PAGE_SIZE * mem->page_count);
+
+		if (*offset >= min_off && *offset < max_off) {
+			/* range check size */
+			if (size > (max_off - *offset)) {
+				error = EINVAL;
+			} else {
+				/* get new VM object offset to use */
+				*offset -= min_off;
+				vm_object_reference(mem->object);
+				*object = mem->object;
+				error = 0;
+			}
 			break;
+		}
 	}
-	if (mem == NULL) {
-		cuse_server_unlock(pcs);
-		return (ENOMEM);
-	}
-	/* verify page offset */
-	page_nr %= CUSE_ALLOC_PAGES_MAX;
-	if (page_nr >= mem->page_count) {
-		cuse_server_unlock(pcs);
-		return (ENXIO);
-	}
-	/* verify mmap size */
-	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
-	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
-		cuse_server_unlock(pcs);
-		return (EINVAL);
-	}
-	vm_object_reference(mem->object);
-	*object = mem->object;
 	cuse_server_unlock(pcs);
+	return (error);
+}
+
+static int
+cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
+    vm_size_t size, struct vm_object **object, int nprot)
+{
+	struct cuse_server *pcs;
+	int error;
 
-	/* set new VM object offset to use */
-	*offset = page_nr * PAGE_SIZE;
+	error = cuse_server_get(&pcs);
+	if (error != 0)
+		return (error);
 
-	/* success */
-	return (0);
+	return (cuse_common_mmap_single(pcs, offset, size, object));
 }
 
 /*------------------------------------------------------------------------*
@@ -1811,50 +1818,14 @@ static int
 cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset,
     vm_size_t size, struct vm_object **object, int nprot)
 {
-	uint32_t page_nr = *offset / PAGE_SIZE;
-	uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX;
-	struct cuse_memory *mem;
 	struct cuse_client *pcc;
-	struct cuse_server *pcs;
 	int error;
 
 	error = cuse_client_get(&pcc);
 	if (error != 0)
 		return (error);
 
-	pcs = pcc->server;
-
-	cuse_server_lock(pcs);
-	/* lookup memory structure */
-	TAILQ_FOREACH(mem, &pcs->hmem, entry) {
-		if (mem->alloc_nr == alloc_nr)
-			break;
-	}
-	if (mem == NULL) {
-		cuse_server_unlock(pcs);
-		return (ENOMEM);
-	}
-	/* verify page offset */
-	page_nr %= CUSE_ALLOC_PAGES_MAX;
-	if (page_nr >= mem->page_count) {
-		cuse_server_unlock(pcs);
-		return (ENXIO);
-	}
-	/* verify mmap size */
-	if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) ||
-	    (size > ((mem->page_count - page_nr) * PAGE_SIZE))) {
-		cuse_server_unlock(pcs);
-		return (EINVAL);
-	}
-	vm_object_reference(mem->object);
-	*object = mem->object;
-	cuse_server_unlock(pcs);
-
-	/* set new VM object offset to use */
-	*offset = page_nr * PAGE_SIZE;
-
-	/* success */
-	return (0);
+	return (cuse_common_mmap_single(pcc->server, offset, size, object));
 }
 
 static void
diff --git a/sys/fs/cuse/cuse_defs.h b/sys/fs/cuse/cuse_defs.h
index b5f2eba1b23e..4f962fc95ddb 100644
--- a/sys/fs/cuse/cuse_defs.h
+++ b/sys/fs/cuse/cuse_defs.h
@@ -27,7 +27,7 @@
 #ifndef _CUSE_DEFS_H_
 #define	_CUSE_DEFS_H_
 
-#define	CUSE_VERSION		0x000124
+#define	CUSE_VERSION		0x000125
 
 #define	CUSE_ERR_NONE		0
 #define	CUSE_ERR_BUSY		-1
diff --git a/sys/fs/cuse/cuse_ioctl.h b/sys/fs/cuse/cuse_ioctl.h
index 44e3c122979d..c2dc312beee8 100644
--- a/sys/fs/cuse/cuse_ioctl.h
+++ b/sys/fs/cuse/cuse_ioctl.h
@@ -34,9 +34,11 @@
 #define	CUSE_DEVICES_MAX	64	/* units */
 #define	CUSE_BUF_MIN_PTR	0x10000UL
 #define	CUSE_BUF_MAX_PTR	0x20000UL
-#define	CUSE_ALLOC_UNIT_MAX	128	/* units */
+#define	CUSE_ALLOC_UNIT_MAX	128UL	/* units */
+#define	CUSE_ALLOC_UNIT_SHIFT	24	/* bits */
 /* All memory allocations must be less than the following limit */
-#define	CUSE_ALLOC_BYTES_MAX	(1UL << 24)	/* bytes */
+#define	CUSE_ALLOC_BYTES_MAX \
+    (CUSE_ALLOC_UNIT_MAX << CUSE_ALLOC_UNIT_SHIFT) /* bytes */
 
 struct cuse_dev;