Re: git: 995730a635bc - main - swap_pager: cleanup swapoff_object

From: Doug Moore <unkadoug_at_gmail.com>
Date: Thu, 27 Jun 2024 19:19:15 UTC
Omitted:

Previous version tested by pho.

My apologies.

Doug

On 6/27/24 14:17, Doug Moore wrote:
> The branch main has been updated by dougm:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=995730a635bcb3bf6e5f2ffde950fb930cb1c23b
>
> commit 995730a635bcb3bf6e5f2ffde950fb930cb1c23b
> Author:     Doug Moore <dougm@FreeBSD.org>
> AuthorDate: 2024-06-27 19:06:09 +0000
> Commit:     Doug Moore <dougm@FreeBSD.org>
> CommitDate: 2024-06-27 19:06:09 +0000
>
>      swap_pager: cleanup swapoff_object
>      
>      Function swap_pager_swapoff_object calls vm_pager_unswapped (via
>      swp_pager_force_dirty) for every page that must be unswapped. That
>      means that there's an unneeded check for lock ownership (the caller
>      always owns it), a needless PCTRIE_LOOKUP (the caller has already
>      found it), a call to free one page of swap space only, and a check to
>      see if all blocks are empty, when the caller usually knows that the
>      check is useless.
>      
>      Isolate the essential part, needed however swap_pager_unswapped is
>      invoked, into a smaller function swap_pager_unswapped_acct.  From
>      swapoff_object, invoke swp_pager_update_freerange for each appropriate
>      page, so that there are potentially fewer calls to
>      swp_pager_freeswapspace.  Consider freeing a set of blocks (a struct
>      swblk) only after having invalidated all those blocks.
>      
>      Replace the doubly-nested loops with a single loop, and refetch and
>      rescan a swblk only when the object write lock has been released and
>      reacquired.
>      
>      After getting a page from swap, dirty it immediately to address a race
>      condition observed by @kib.
>      
>      Reviewed by:    kib
>      Differential Revision:  https://reviews.freebsd.org/D45668
> ---
>   sys/vm/swap_pager.c | 184 +++++++++++++++++++++++++++++-----------------------
>   1 file changed, 103 insertions(+), 81 deletions(-)
>
> diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
> index 8d9236a2eb1a..79986842d814 100644
> --- a/sys/vm/swap_pager.c
> +++ b/sys/vm/swap_pager.c
> @@ -1183,6 +1183,21 @@ swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
>   	return (TRUE);
>   }
>   
> +static void
> +swap_pager_unswapped_acct(vm_page_t m)
> +{
> +	KASSERT((m->object->flags & OBJ_SWAP) != 0,
> +	    ("Free object not swappable"));
> +	if ((m->a.flags & PGA_SWAP_FREE) != 0)
> +		counter_u64_add(swap_free_completed, 1);
> +	vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE);
> +
> +	/*
> +	 * The meta data only exists if the object is OBJT_SWAP
> +	 * and even then might not be allocated yet.
> +	 */
> +}
> +
>   /*
>    * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
>    *
> @@ -1229,16 +1244,7 @@ swap_pager_unswapped(vm_page_t m)
>   		}
>   		return;
>   	}
> -	if ((m->a.flags & PGA_SWAP_FREE) != 0)
> -		counter_u64_add(swap_free_completed, 1);
> -	vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE);
> -
> -	/*
> -	 * The meta data only exists if the object is OBJT_SWAP
> -	 * and even then might not be allocated yet.
> -	 */
> -	KASSERT((m->object->flags & OBJ_SWAP) != 0,
> -	    ("Free object not swappable"));
> +	swap_pager_unswapped_acct(m);
>   
>   	sb = SWAP_PCTRIE_LOOKUP(&m->object->un_pager.swp.swp_blks,
>   	    rounddown(m->pindex, SWAP_META_PAGES));
> @@ -1786,11 +1792,12 @@ swap_pager_nswapdev(void)
>   }
>   
>   static void
> -swp_pager_force_dirty(vm_page_t m)
> +swp_pager_force_dirty(struct page_range *range, vm_page_t m, daddr_t *blk)
>   {
> -
>   	vm_page_dirty(m);
> -	swap_pager_unswapped(m);
> +	swap_pager_unswapped_acct(m);
> +	swp_pager_update_freerange(range, *blk);
> +	*blk = SWAPBLK_NONE;
>   	vm_page_launder(m);
>   }
>   
> @@ -1827,18 +1834,23 @@ swap_pager_swapped_pages(vm_object_t object)
>   static void
>   swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
>   {
> +	struct page_range range;
>   	struct swblk *sb;
>   	vm_page_t m;
>   	vm_pindex_t pi;
>   	daddr_t blk;
> -	int i, nv, rahead, rv;
> +	int i, rahead, rv;
> +	bool sb_empty;
>   
> +	VM_OBJECT_ASSERT_WLOCKED(object);
>   	KASSERT((object->flags & OBJ_SWAP) != 0,
>   	    ("%s: Object not swappable", __func__));
>   
> -	for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
> -	    &object->un_pager.swp.swp_blks, pi)) != NULL; ) {
> -		if ((object->flags & OBJ_DEAD) != 0) {
> +	pi = 0;
> +	i = 0;
> +	swp_pager_init_freerange(&range);
> +	for (;;) {
> +		if (i == 0 && (object->flags & OBJ_DEAD) != 0) {
>   			/*
>   			 * Make sure that pending writes finish before
>   			 * returning.
> @@ -1847,76 +1859,86 @@ swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
>   			swp_pager_meta_free_all(object);
>   			break;
>   		}
> -		for (i = 0; i < SWAP_META_PAGES; i++) {
> -			/*
> -			 * Count the number of contiguous valid blocks.
> -			 */
> -			for (nv = 0; nv < SWAP_META_PAGES - i; nv++) {
> -				blk = sb->d[i + nv];
> -				if (!swp_pager_isondev(blk, sp) ||
> -				    blk == SWAPBLK_NONE)
> -					break;
> -			}
> -			if (nv == 0)
> -				continue;
>   
> -			/*
> -			 * Look for a page corresponding to the first
> -			 * valid block and ensure that any pending paging
> -			 * operations on it are complete.  If the page is valid,
> -			 * mark it dirty and free the swap block.  Try to batch
> -			 * this operation since it may cause sp to be freed,
> -			 * meaning that we must restart the scan.  Avoid busying
> -			 * valid pages since we may block forever on kernel
> -			 * stack pages.
> -			 */
> -			m = vm_page_lookup(object, sb->p + i);
> -			if (m == NULL) {
> -				m = vm_page_alloc(object, sb->p + i,
> -				    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
> -				if (m == NULL)
> -					break;
> -			} else {
> -				if ((m->oflags & VPO_SWAPINPROG) != 0) {
> -					m->oflags |= VPO_SWAPSLEEP;
> -					VM_OBJECT_SLEEP(object, &object->handle,
> -					    PSWP, "swpoff", 0);
> -					break;
> -				}
> -				if (vm_page_all_valid(m)) {
> -					do {
> -						swp_pager_force_dirty(m);
> -					} while (--nv > 0 &&
> -					    (m = vm_page_next(m)) != NULL &&
> -					    vm_page_all_valid(m) &&
> -					    (m->oflags & VPO_SWAPINPROG) == 0);
> -					break;
> -				}
> -				if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL))
> -					break;
> +		if (i == SWAP_META_PAGES) {
> +			pi = sb->p + SWAP_META_PAGES;
> +			if (sb_empty) {
> +				SWAP_PCTRIE_REMOVE(
> +				    &object->un_pager.swp.swp_blks, sb->p);
> +				uma_zfree(swblk_zone, sb);
>   			}
> +			i = 0;
> +		}
>   
> -			vm_object_pip_add(object, 1);
> -			rahead = SWAP_META_PAGES;
> -			rv = swap_pager_getpages_locked(object, &m, 1, NULL,
> -			    &rahead);
> -			if (rv != VM_PAGER_OK)
> -				panic("%s: read from swap failed: %d",
> -				    __func__, rv);
> -			VM_OBJECT_WLOCK(object);
> -			vm_object_pip_wakeupn(object, 1);
> -			vm_page_xunbusy(m);
> +		if (i == 0) {
> +			sb = SWAP_PCTRIE_LOOKUP_GE(
> +			    &object->un_pager.swp.swp_blks, pi);
> +			if (sb == NULL)
> +				break;
> +			sb_empty = true;
> +			m = NULL;
> +		}
>   
> -			/*
> -			 * The object lock was dropped so we must restart the
> -			 * scan of this swap block.  Pages paged in during this
> -			 * iteration will be marked dirty in a future iteration.
> -			 */
> -			break;
> +		/* Skip an invalid block. */
> +		blk = sb->d[i];
> +		if (blk == SWAPBLK_NONE || !swp_pager_isondev(blk, sp)) {
> +			if (blk != SWAPBLK_NONE)
> +				sb_empty = false;
> +			m = NULL;
> +			i++;
> +			continue;
>   		}
> -		if (i == SWAP_META_PAGES)
> -			pi = sb->p + SWAP_META_PAGES;
> +
> +		/*
> +		 * Look for a page corresponding to this block, If the found
> +		 * page has pending operations, sleep and restart the scan.
> +		 */
> +		m = m != NULL ? vm_page_next(m) :
> +		    vm_page_lookup(object, sb->p + i);
> +		if (m != NULL && (m->oflags & VPO_SWAPINPROG) != 0) {
> +			m->oflags |= VPO_SWAPSLEEP;
> +			VM_OBJECT_SLEEP(object, &object->handle, PSWP, "swpoff",
> +			    0);
> +			i = 0;	/* Restart scan after object lock dropped. */
> +			continue;
> +		}
> +
> +		/*
> +		 * If the found page is valid, mark it dirty and free the swap
> +		 * block.
> +		 */
> +		if (m != NULL && vm_page_all_valid(m)) {
> +			swp_pager_force_dirty(&range, m, &sb->d[i]);
> +			i++;
> +			continue;
> +		}
> +
> +		/* Is there a page we can acquire or allocate? */
> +		if (m == NULL) {
> +			m = vm_page_alloc(object, sb->p + i,
> +			    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
> +		} else if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL))
> +			m = NULL;
> +
> +		/* If no page available, repeat this iteration. */
> +		if (m == NULL)
> +			continue;
> +
> +		/* Get the page from swap, mark it dirty, restart the scan. */
> +		vm_object_pip_add(object, 1);
> +		rahead = SWAP_META_PAGES;
> +		rv = swap_pager_getpages_locked(object, &m, 1, NULL, &rahead);
> +		if (rv != VM_PAGER_OK)
> +			panic("%s: read from swap failed: %d", __func__, rv);
> +		VM_OBJECT_WLOCK(object);
> +		vm_object_pip_wakeupn(object, 1);
> +		KASSERT(vm_page_all_valid(m),
> +		    ("%s: Page %p not all valid", __func__, m));
> +		swp_pager_force_dirty(&range, m, &sb->d[i]);
> +		vm_page_xunbusy(m);
> +		i = 0;	/* Restart scan after object lock dropped. */
>   	}
> +	swp_pager_freeswapspace(&range);
>   }
>   
>   /*