git: d01183fddf37 - main - geom/zero: Add support for unmapped I/O

From: Mateusz Piotrowski <0mp_at_FreeBSD.org>
Date: Tue, 11 Nov 2025 16:07:41 UTC
The branch main has been updated by 0mp:

URL: https://cgit.FreeBSD.org/src/commit/?id=d01183fddf37b15da7ab3d69039e7759ae144451

commit d01183fddf37b15da7ab3d69039e7759ae144451
Author:     Mateusz Piotrowski <0mp@FreeBSD.org>
AuthorDate: 2025-09-26 07:08:30 +0000
Commit:     Mateusz Piotrowski <0mp@FreeBSD.org>
CommitDate: 2025-11-11 16:06:59 +0000

    geom/zero: Add support for unmapped I/O
    
    This patch adds support for unmapped I/O to gzero(4).
    
    Let's consider the following script to illustrate the change in
    gzero(4)'s behavior:
    
    ```
    dd="dd if=/dev/gzero of=/dev/null bs=512 count=100000"
    dtrace -q -c "$dd" -n '
        fbt::pmap_qenter:entry,
        fbt::uiomove_fromphys:entry,
        fbt::memset:entry
        /execname == "dd"/
        {
            @[probefunc] = count();
        }
    '
    ```
    
    Let's run that script 4 times:
    
    ```
    ==> 1: unmapped I/O not supported (fallback to mapped I/O), kern.geom.zero.clear=1
    51200000 bytes transferred in 1.795809 secs (28510829 bytes/sec)
      pmap_qenter                                                  100000
      memset                                                       400011
    
    ==> 2: unmapped I/O not supported (fallback to mapped I/O), kern.geom.zero.clear=0
    51200000 bytes transferred in 0.701079 secs (73030337 bytes/sec)
      memset                                                       300011
    
    ==> 3: unmapped I/O supported, kern.geom.zero.clear=1
    51200000 bytes transferred in 0.771680 secs (66348750 bytes/sec)
      uiomove_fromphys                                             100000
      memset                                                       300011
    
    ==> 4: unmapped I/O supported, kern.geom.zero.clear=0
    51200000 bytes transferred in 0.621303 secs (82407407 bytes/sec)
      memset                                                       300011
    ```
    
    If kern.geom.zero.clear=0, then nothing really changes as no copying takes
    place. Otherwise, we see by adding unmapped I/O support we avoid calls to
    pmap_qenter(), which was called by GEOM to turn unmapped I/O requests into
    mapped ones before passing them for processing to gzero(4).
    
    Reviewed by:    bnovkov, markj
    Approved by:    bnovkov (mentor), markj (mentor)
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D52998
---
 sys/geom/zero/g_zero.c | 82 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 19 deletions(-)

diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c
index 7952147d660a..25d462a9f918 100644
--- a/sys/geom/zero/g_zero.c
+++ b/sys/geom/zero/g_zero.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
+ * Copyright (c) 2025 Mateusz Piotrowski <0mp@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,53 +35,97 @@
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/types.h>
 
 #include <geom/geom.h>
 
 #define	G_ZERO_CLASS_NAME	"ZERO"
 
-static int	g_zero_clear_sysctl(SYSCTL_HANDLER_ARGS);
+static int	g_zero_byte_sysctl(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, zero, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "GEOM_ZERO stuff");
 static int g_zero_clear = 1;
-SYSCTL_PROC(_kern_geom_zero, OID_AUTO, clear,
-    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &g_zero_clear, 0,
-    g_zero_clear_sysctl, "I",
+SYSCTL_INT(_kern_geom_zero, OID_AUTO, clear,
+    CTLFLAG_RWTUN, &g_zero_clear, 0,
     "Clear read data buffer");
 static int g_zero_byte = 0;
-SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RWTUN, &g_zero_byte, 0,
+static uint8_t g_zero_buffer[PAGE_SIZE];
+SYSCTL_PROC(_kern_geom_zero, OID_AUTO, byte,
+    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &g_zero_byte, 0,
+    g_zero_byte_sysctl, "I",
     "Byte (octet) value to clear the buffers with");
 
 static struct g_provider *gpp;
 
 static int
-g_zero_clear_sysctl(SYSCTL_HANDLER_ARGS)
+g_zero_byte_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
-	error = sysctl_handle_int(oidp, &g_zero_clear, 0, req);
+	// XXX: Confirm that this is called on module load as well.
+	// XXX: Shouldn't we lock here to avoid changing the byte value if the
+	// driver is in the process of handling I/O?
+	error = sysctl_handle_int(oidp, &g_zero_byte, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
-	if (gpp == NULL)
-		return (ENXIO);
-	if (g_zero_clear)
-		gpp->flags &= ~G_PF_ACCEPT_UNMAPPED;
-	else
-		gpp->flags |= G_PF_ACCEPT_UNMAPPED;
+	memset(g_zero_buffer, g_zero_byte, PAGE_SIZE);
 	return (0);
 }
 
+static void
+g_zero_fill_pages(struct bio *bp)
+{
+	struct iovec aiovec;
+	struct uio auio;
+	size_t length;
+	vm_offset_t offset;
+
+	aiovec.iov_base = g_zero_buffer;
+	aiovec.iov_len = PAGE_SIZE;
+	auio.uio_iov = &aiovec;
+	auio.uio_iovcnt = 1;
+	auio.uio_offset = 0;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_td = curthread;
+
+	/*
+	 * To handle the unmapped I/O request, we need to fill the pages in the
+	 * bp->bio_ma array with the g_zero_byte value. However, instead of
+	 * setting every byte individually, we use uiomove_fromphys() to fill a
+	 * page at a time with g_zero_buffer.
+	 */
+	bp->bio_resid = bp->bio_length;
+	offset = bp->bio_ma_offset & PAGE_MASK;
+	for (int i = 0; i < bp->bio_ma_n && bp->bio_resid > 0; i++) {
+		length = MIN(PAGE_SIZE - offset, bp->bio_resid);
+		auio.uio_resid = length;
+
+		(void)uiomove_fromphys(&bp->bio_ma[i], offset, length, &auio);
+
+		offset = 0;
+		bp->bio_resid -= length;
+	}
+}
+
+
 static void
 g_zero_start(struct bio *bp)
 {
-	int error = ENXIO;
+	int error;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
-		if (g_zero_clear && (bp->bio_flags & BIO_UNMAPPED) == 0)
-			memset(bp->bio_data, g_zero_byte, bp->bio_length);
+		if (g_zero_clear) {
+			if ((bp->bio_flags & BIO_UNMAPPED) != 0)
+				g_zero_fill_pages(bp);
+			else
+				memset(bp->bio_data, g_zero_byte,
+				    bp->bio_length);
+		}
 		/* FALLTHROUGH */
 	case BIO_DELETE:
 	case BIO_WRITE:
@@ -106,9 +151,8 @@ g_zero_init(struct g_class *mp)
 	gp->start = g_zero_start;
 	gp->access = g_std_access;
 	gpp = pp = g_new_providerf(gp, "%s", gp->name);
-	pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
-	if (!g_zero_clear)
-		pp->flags |= G_PF_ACCEPT_UNMAPPED;
+	pp->flags |= G_PF_ACCEPT_UNMAPPED | G_PF_DIRECT_SEND |
+	    G_PF_DIRECT_RECEIVE;
 	pp->mediasize = 1152921504606846976LLU;
 	pp->sectorsize = 512;
 	g_error_provider(pp, 0);