git: 5e59b2734f77 - main - cuse(3): Optimise small reads and writes.

From: Hans Petter Selasky <hselasky_at_FreeBSD.org>
Date: Mon, 03 Oct 2022 15:36:58 UTC
The branch main has been updated by hselasky:

URL: https://cgit.FreeBSD.org/src/commit/?id=5e59b2734f77da24e2bc45154c7db949b9d790c5

commit 5e59b2734f77da24e2bc45154c7db949b9d790c5
Author:     Hans Petter Selasky <hselasky@FreeBSD.org>
AuthorDate: 2022-09-20 13:58:36 +0000
Commit:     Hans Petter Selasky <hselasky@FreeBSD.org>
CommitDate: 2022-10-03 15:35:14 +0000

    cuse(3): Optimise small reads and writes.
    
    When doing small reads and writes use an intermediate buffer to store the
    data to save locking the remote process to access data.
    
    Reviewed by:    imp @
    Differential Revision:  https://reviews.freebsd.org/D36633
    MFC after:      1 week
    Sponsored by:   NVIDIA Networking
---
 sys/fs/cuse/cuse.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 130 insertions(+), 6 deletions(-)

diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
index a870df1b07dc..41f8bdbe2bfe 100644
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -1,6 +1,6 @@
 /* $FreeBSD$ */
 /*-
- * Copyright (c) 2010-2022 Hans Petter Selasky. All rights reserved.
+ * Copyright (c) 2010-2022 Hans Petter Selasky
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -64,6 +64,10 @@
 #include <fs/cuse/cuse_defs.h>
 #include <fs/cuse/cuse_ioctl.h>
 
+/* set this define to zero to disable this feature */
+#define	CUSE_COPY_BUFFER_MAX \
+	CUSE_BUFFER_MAX
+
 #define	CUSE_ALLOC_PAGES_MAX \
 	(CUSE_ALLOC_BYTES_MAX / PAGE_SIZE)
 
@@ -154,6 +158,12 @@ struct cuse_client {
 	struct cuse_server *server;
 	struct cuse_server_dev *server_dev;
 
+	uintptr_t read_base;
+	uintptr_t write_base;
+	int read_length;
+	int write_length;
+	uint8_t	read_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
+	uint8_t	write_buffer[CUSE_COPY_BUFFER_MAX] __aligned(4);
 	uint8_t	ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4);
 
 	int	fflags;			/* file flags */
@@ -265,6 +275,12 @@ cuse_server_unlock(struct cuse_server *pcs)
 	mtx_unlock(&pcs->mtx);
 }
 
+static bool
+cuse_server_is_locked(struct cuse_server *pcs)
+{
+	return (mtx_owned(&pcs->mtx));
+}
+
 static void
 cuse_cmd_lock(struct cuse_client_command *pccmd)
 {
@@ -967,6 +983,48 @@ cuse_server_data_copy_locked(struct cuse_server *pcs,
 	return (error);
 }
 
+static int
+cuse_server_data_copy_optimized_locked(struct cuse_server *pcs,
+    struct cuse_client_command *pccmd,
+    struct cuse_data_chunk *pchk, bool isread)
+{
+	uintptr_t offset;
+	int error;
+
+	/*
+	 * Check if data is stored locally to avoid accessing
+	 * other process's data space:
+	 */
+	if (isread) {
+		offset = pchk->peer_ptr - pccmd->client->write_base;
+
+		if (offset < (uintptr_t)pccmd->client->write_length &&
+		    pchk->length <= (unsigned long)pccmd->client->write_length &&
+		    offset + pchk->length <= (uintptr_t)pccmd->client->write_length) {
+			cuse_server_unlock(pcs);
+			error = copyout(pccmd->client->write_buffer + offset,
+			    (void *)pchk->local_ptr, pchk->length);
+			goto done;
+		}
+	} else {
+		offset = pchk->peer_ptr - pccmd->client->read_base;
+
+		if (offset < (uintptr_t)pccmd->client->read_length &&
+		    pchk->length <= (unsigned long)pccmd->client->read_length &&
+		    offset + pchk->length <= (uintptr_t)pccmd->client->read_length) {
+			cuse_server_unlock(pcs);
+			error = copyin((void *)pchk->local_ptr,
+			    pccmd->client->read_buffer + offset, pchk->length);
+			goto done;
+		}
+	}
+
+	/* use process to process copy function */
+	error = cuse_server_data_copy_locked(pcs, pccmd, pchk, isread);
+done:
+	return (error);
+}
+
 static int
 cuse_alloc_unit_by_id_locked(struct cuse_server *pcs, int id)
 {
@@ -1294,14 +1352,22 @@ cuse_server_ioctl(struct cdev *dev, unsigned long cmd,
 			error = ENXIO;	/* invalid request */
 		} else if (pchk->peer_ptr < CUSE_BUF_MIN_PTR) {
 			error = EFAULT;	/* NULL pointer */
+		} else if (pchk->length == 0) {
+			/* NOP */
 		} else if (pchk->peer_ptr < CUSE_BUF_MAX_PTR) {
 			error = cuse_server_ioctl_copy_locked(pcs, pccmd,
 			    pchk, cmd == CUSE_IOCTL_READ_DATA);
 		} else {
-			error = cuse_server_data_copy_locked(pcs, pccmd,
-			    pchk, cmd == CUSE_IOCTL_READ_DATA);
+			error = cuse_server_data_copy_optimized_locked(
+			    pcs, pccmd, pchk, cmd == CUSE_IOCTL_READ_DATA);
 		}
-		cuse_server_unlock(pcs);
+
+		/*
+		 * Sometimes the functions above drop the server lock
+		 * early as an optimization:
+		 */
+		if (cuse_server_is_locked(pcs))
+			cuse_server_unlock(pcs);
 		break;
 
 	case CUSE_IOCTL_SELWAKEUP:
@@ -1581,6 +1647,7 @@ cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
 	struct cuse_client *pcc;
 	struct cuse_server *pcs;
 	int error;
+	int temp;
 	int len;
 
 	error = cuse_client_get(&pcc);
@@ -1605,13 +1672,41 @@ cuse_client_read(struct cdev *dev, struct uio *uio, int ioflag)
 		len = uio->uio_iov->iov_len;
 
 		cuse_server_lock(pcs);
+		if (len <= CUSE_COPY_BUFFER_MAX) {
+			/* set read buffer region for small reads */
+			pcc->read_base = (uintptr_t)uio->uio_iov->iov_base;
+			pcc->read_length = len;
+		}
 		cuse_client_send_command_locked(pccmd,
 		    (uintptr_t)uio->uio_iov->iov_base,
 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
 
 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
+		/*
+		 * After finishing reading data, disable the read
+		 * region for the cuse_server_data_copy_optimized_locked()
+		 * function:
+		 */
+		pcc->read_base = 0;
+		pcc->read_length = 0;
 		cuse_server_unlock(pcs);
 
+		/*
+		 * The return value indicates the read length, when
+		 * not negative. Range check it just in case to avoid
+		 * passing invalid length values to uiomove().
+		 */
+		if (error > len) {
+			error = ERANGE;
+			break;
+		} else if (error > 0 && len <= CUSE_COPY_BUFFER_MAX) {
+			temp = copyout(pcc->read_buffer,
+			    uio->uio_iov->iov_base, error);
+			if (temp != 0) {
+				error = temp;
+				break;
+			}
+		}
 		if (error < 0) {
 			error = cuse_convert_error(error);
 			break;
@@ -1664,15 +1759,43 @@ cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
 		}
 		len = uio->uio_iov->iov_len;
 
+		if (len <= CUSE_COPY_BUFFER_MAX) {
+			error = copyin(uio->uio_iov->iov_base,
+			    pcc->write_buffer, len);
+			if (error != 0)
+				break;
+		}
+
 		cuse_server_lock(pcs);
+		if (len <= CUSE_COPY_BUFFER_MAX) {
+			/* set write buffer region for small writes */
+			pcc->write_base = (uintptr_t)uio->uio_iov->iov_base;
+			pcc->write_length = len;
+		}
 		cuse_client_send_command_locked(pccmd,
 		    (uintptr_t)uio->uio_iov->iov_base,
 		    (unsigned long)(unsigned int)len, pcc->fflags, ioflag);
 
 		error = cuse_client_receive_command_locked(pccmd, 0, 0);
+
+		/*
+		 * After finishing writing data, disable the write
+		 * region for the cuse_server_data_copy_optimized_locked()
+		 * function:
+		 */
+		pcc->write_base = 0;
+		pcc->write_length = 0;
 		cuse_server_unlock(pcs);
 
-		if (error < 0) {
+		/*
+		 * The return value indicates the write length, when
+		 * not negative. Range check it just in case to avoid
+		 * passing invalid length values to uiomove().
+		 */
+		if (error > len) {
+			error = ERANGE;
+			break;
+		} else if (error < 0) {
 			error = cuse_convert_error(error);
 			break;
 		} else if (error == len) {
@@ -1686,7 +1809,8 @@ cuse_client_write(struct cdev *dev, struct uio *uio, int ioflag)
 	}
 	cuse_cmd_unlock(pccmd);
 
-	uio->uio_segflg = UIO_USERSPACE;/* restore segment flag */
+	/* restore segment flag */
+	uio->uio_segflg = UIO_USERSPACE;
 
 	if (error == EWOULDBLOCK)
 		cuse_client_kqfilter_poll(dev, pcc);