svn commit: r359473 - head/sys/kern

Konstantin Belousov kib at FreeBSD.org
Mon Mar 30 22:22:37 UTC 2020


Author: kib
Date: Mon Mar 30 22:13:32 2020
New Revision: 359473
URL: https://svnweb.freebsd.org/changeset/base/359473

Log:
  kern_sendfile.c: fix bugs with handling of busy page states.
  
  - Do not call into a vnode pager while leaving some pages from the
    same block as the current run, xbusy. This immediately deadlocks if
    pager needs to instantiate the buffer.
  - Only relookup bogus pages after io finished, otherwise we might
    obliterate the valid pages by out of date disk content.  While there,
    expand the comment explaining this pecularity.
  - Do not double-unbusy on error.  Split unbusy for error case, which
    is left in the sendfile_swapin(), from the more properly coded
    normal case in sendfile_iodone().
  - Add an XXXKIB comment explaining the serious bug in the validation
    algorithm, not fixed by this patch series.
  
  PR:	244713
  Reviewed by:	glebius, markj
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week
  Differential revision:	https://reviews.freebsd.org/D24038

Modified:
  head/sys/kern/kern_sendfile.c

Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c	Mon Mar 30 22:07:11 2020	(r359472)
+++ head/sys/kern/kern_sendfile.c	Mon Mar 30 22:13:32 2020	(r359473)
@@ -92,6 +92,7 @@ struct sf_io {
 	struct socket	*so;
 	struct mbuf	*m;
 	vm_object_t	obj;
+	vm_pindex_t	pindex0;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 #endif
@@ -270,17 +271,42 @@ sendfile_iowait(struct sf_io *sfio, const char *wmesg)
  * I/O completion callback.
  */
 static void
-sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
+sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
 {
 	struct sf_io *sfio = arg;
 	struct socket *so;
+	int i;
 
-	for (int i = 0; i < count; i++)
-		if (pg[i] != bogus_page)
-			vm_page_xunbusy_unchecked(pg[i]);
-
-	if (error)
+	if (error != 0) {
 		sfio->error = error;
+		/*
+		 * Restore of the pg[] elements is done by
+		 * sendfile_swapin().
+		 */
+	} else {
+		/*
+		 * Restore the valid page pointers.  They are already
+		 * unbusied, but still wired.  For error != 0 case,
+		 * sendfile_swapin() handles unbusy.
+		 *
+		 * XXXKIB since pages are only wired, and we do not
+		 * own the object lock, other users might have
+		 * invalidated them in meantime.  Similarly, after we
+		 * unbusied the swapped-in pages, they can become
+		 * invalid under us.
+		 */
+		for (i = 0; i < count; i++) {
+			if (pa[i] == bogus_page) {
+				pa[i] = vm_page_relookup(sfio->obj,
+				    sfio->pindex0 + i + (sfio->pa - pa));
+				KASSERT(pa[i] != NULL,
+				    ("%s: page %p[%d] disappeared",
+				    __func__, pa, i));
+			} else {
+				vm_page_xunbusy_unchecked(pa[i]);
+			}
+		}
+	}
 
 	if (!refcount_release(&sfio->nios))
 		return;
@@ -361,11 +387,13 @@ static int
 sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
     off_t len, int npages, int rhpages, int flags)
 {
-	vm_page_t *pa = sfio->pa;
-	int grabbed;
+	vm_page_t *pa;
+	int a, count, count1, grabbed, i, j, rv;
 
+	pa = sfio->pa;
 	*nios = 0;
 	flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
+	sfio->pindex0 = OFF_TO_IDX(off);
 
 	/*
 	 * First grab all the pages and wire them.  Note that we grab
@@ -380,9 +408,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 		rhpages = 0;
 	}
 
-	for (int i = 0; i < npages;) {
-		int j, a, count, rv;
-
+	for (i = 0; i < npages;) {
 		/* Skip valid pages. */
 		if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
 		    xfsize(i, npages, off, len))) {
@@ -422,19 +448,41 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 		count = min(a + 1, npages - i);
 
 		/*
-		 * We should not pagein into a valid page, thus we first trim
-		 * any valid pages off the end of request, and substitute
-		 * to bogus_page those, that are in the middle.
+		 * We should not pagein into a valid page because
+		 * there might be still unfinished write tracked by
+		 * e.g. a buffer, thus we substitute any valid pages
+		 * with the bogus one.
+		 *
+		 * We must not leave around xbusy pages which are not
+		 * part of the run passed to vm_pager_getpages(),
+		 * otherwise pager might deadlock waiting for the busy
+		 * status of the page, e.g. if it constitues the
+		 * buffer needed to validate other page.
+		 *
+		 * First trim the end of the run consisting of the
+		 * valid pages, then replace the rest of the valid
+		 * with bogus.
 		 */
+		count1 = count;
 		for (j = i + count - 1; j > i; j--) {
 			if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
 			    xfsize(j, npages, off, len))) {
+				vm_page_xunbusy(pa[j]);
+				SFSTAT_INC(sf_pages_valid);
 				count--;
-				rhpages = 0;
-			} else
+			} else {
 				break;
+			}
 		}
-		for (j = i + 1; j < i + count - 1; j++)
+
+		/*
+		 * The last page in the run pa[i + count - 1] is
+		 * guaranteed to be invalid by the trim above, so it
+		 * is not replaced with bogus, thus -1 in the loop end
+		 * condition.
+		 */
+		MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL);
+		for (j = i + 1; j < i + count - 1; j++) {
 			if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
 			    xfsize(j, npages, off, len))) {
 				vm_page_xunbusy(pa[j]);
@@ -442,6 +490,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 				SFSTAT_INC(sf_pages_bogus);
 				pa[j] = bogus_page;
 			}
+		}
 
 		refcount_acquire(&sfio->nios);
 		rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
@@ -453,12 +502,16 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 			/*
 			 * Perform full pages recovery before returning EIO.
 			 * Pages from 0 to npages are wired.
-			 * Pages from i to npages are also busied.
 			 * Pages from (i + 1) to (i + count - 1) may be
 			 * substituted to bogus page, and not busied.
+			 * Pages from (i + count) to (i + count1 - 1) are
+			 * not busied.
+			 * Rest of the pages from i to npages are busied.
 			 */
 			for (j = 0; j < npages; j++) {
-				if (j > i && j < i + count - 1 &&
+				if (j >= i + count && j < i + count1)
+					;
+				else if (j > i && j < i + count - 1 &&
 				    pa[j] == bogus_page)
 					pa[j] = vm_page_relookup(obj,
 					    OFF_TO_IDX(vmoff(j, off)));
@@ -477,19 +530,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
 		if (i + count == npages)
 			SFSTAT_ADD(sf_rhpages_read, rhpages);
 
-		/*
-		 * Restore the valid page pointers.  They are already
-		 * unbusied, but still wired.
-		 */
-		for (j = i + 1; j < i + count - 1; j++)
-			if (pa[j] == bogus_page) {
-				pa[j] = vm_page_relookup(obj,
-				    OFF_TO_IDX(vmoff(j, off)));
-				KASSERT(pa[j], ("%s: page %p[%d] disappeared",
-				    __func__, pa, j));
-
-			}
-		i += count;
+		i += count1;
 		(*nios)++;
 	}
 


More information about the svn-src-all mailing list