FreeBSD and memory balloon drivers

Konstantin Belousov kostikbel at gmail.com
Sat Jun 21 05:31:31 UTC 2014


On Fri, Jun 20, 2014 at 06:56:20PM +0200, Roger Pau Monn? wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On 20/06/14 17:26, Konstantin Belousov wrote:
> > On Fri, Jun 20, 2014 at 05:15:43PM +0200, Roger Pau Monn? wrote:
> >> -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
> >> 
> >> On 20/06/14 15:28, Konstantin Belousov wrote:
> >>> On Fri, Jun 20, 2014 at 11:35:53AM +0200, Roger Pau Monn?
> >>> wrote:
> >>>> Hello,
> >>>> 
> >>>> I've been looking into the Xen balloon driver, because I've 
> >>>> experienced problems when ballooning memory down which AFAICT
> >>>> are also present in the VirtIO balloon driver. The problem
> >>>> I've experienced is that when ballooning memory down, we
> >>>> basically allocate a bunch of memory as WIRED, to make sure
> >>>> nobody tries to swap it do disk, since it will crash the
> >>>> kernel because the memory is not populated. Due to this
> >>>> massive amount of memory allocated as WIRED, user-space
> >>>> programs that try to use mlock will fail because we hit the
> >>>> limit in vm.max_wired.
> >>>> 
> >>>> I'm not sure what's the best way to deal with this
> >>>> limitation, should vm.max_wired be changed from the balloon
> >>>> drivers when ballooning down/up? Is there anyway to remove
> >>>> the pages ballooned down from the memory accounting of wired
> >>>> pages?
> >>> 
> >>> You could change the type of pages the ballon driver is 
> >>> allocating. Instead of wired pages, you may request unmanaged,
> >>> by passing NULL object to vm_page_alloc().  This would also
> >>> save on the trie nodes for managing the radix trie for the
> >>> object.  There are still plinks or listq to keep track of the
> >>> allocated pages.
> >> 
> >> Thanks for the info, I have the following patch which fixes the
> >> usage of WIRED for both the Xen and the VirtIO balloon drivers,
> >> could someone please test the VirtIO side?
> > I briefly looked at the xen balloon.  You do not need
> > balloon_append(). Use struct vm_page plinks field to link the
> > pages.
> 
> Sure, thanks for the review, here is an updated version.
It looks fine to me.

> 
> Roger.
> 
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.12 (Darwin)
> 
> iQEcBAEBAgAGBQJTpGe0AAoJEKXZdqUyumTA5acH/jQ+Iwjw+QTwUtsh9ZLs7Gm0
> h7XaCwa/gASEjzcxK2smIBuKA10LUSoulcYkC3cUWXqaPwFega14JcbySBRI06Z1
> 1bU50DL8TPLuQIzrVWZgtA+QsZkwEu/jhijr0AMxTcnm6Wq1LV5QRDoqhD0kRiHu
> kEW1ez832y/0j+oeQAq0aR67zVw7hoIouH9eLaWXS4Vgxz5YQ2Pal1BMAY/OCgua
> xyF7BHia9KsGTKZ9pPUQfQAW5eJrwAxR0AitQjmOwRKtWyRqymZxhYHjLYOgOKQJ
> w93aLRVeG2oo0NNC6a9JD/c64sGHLABg37mnSTRL/v9gTj9I1DcFoid2q0iDGbM=
> =8nDn
> -----END PGP SIGNATURE-----

> >From e5c898b1b2b734fdc4aa2acf645efd481f09b71d Mon Sep 17 00:00:00 2001
> From: Roger Pau Monne <roger.pau at citrix.com>
> Date: Fri, 20 Jun 2014 16:34:31 +0200
> Subject: [PATCH] xen/virtio: fix balloon drivers to not mark pages as WIRED
> 
> Prevent the Xen and VirtIO balloon drivers from marking pages as
> wired. This prevents them from increasing the system wired page count,
> which can lead to mlock failing because of hitting the limit in
> vm.max_wired.
> 
> In the Xen case make sure pages are zeroed before giving them back to
> the hypervisor, or else we might be leaking data. Also remove the
> balloon_{append/retrieve} and link pages directly into the
> ballooned_pages queue using the plinks.q field in the page struct.
> 
> Sponsored by: Citrix Systems R&D
> Reviewed by: xxx
> Approved by: xxx
> 
> dev/virtio/balloon/virtio_balloon.c:
>  - Don't allocate pages with VM_ALLOC_WIRED.
> 
> dev/xen/balloon/balloon.c:
>  - Don't allocate pages with VM_ALLOC_WIRED.
>  - Make sure pages are zeroed before giving them back to the
>    hypervisor.
>  - Remove the balloon_entry struct and the balloon_{append/retrieve}
>    functions and use the page plinks.q entry to link the pages
>    directly into the ballooned_pages queue.
> ---
>  sys/dev/virtio/balloon/virtio_balloon.c |    4 +-
>  sys/dev/xen/balloon/balloon.c           |   87 ++++++++-----------------------
>  2 files changed, 23 insertions(+), 68 deletions(-)
> 
> diff --git a/sys/dev/virtio/balloon/virtio_balloon.c b/sys/dev/virtio/balloon/virtio_balloon.c
> index d540099..6d00ef3 100644
> --- a/sys/dev/virtio/balloon/virtio_balloon.c
> +++ b/sys/dev/virtio/balloon/virtio_balloon.c
> @@ -438,8 +438,7 @@ vtballoon_alloc_page(struct vtballoon_softc *sc)
>  {
>  	vm_page_t m;
>  
> -	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
> -	    VM_ALLOC_NOOBJ);
> +	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
>  	if (m != NULL)
>  		sc->vtballoon_current_npages++;
>  
> @@ -450,7 +449,6 @@ static void
>  vtballoon_free_page(struct vtballoon_softc *sc, vm_page_t m)
>  {
>  
> -	vm_page_unwire(m, PQ_INACTIVE);
>  	vm_page_free(m);
>  	sc->vtballoon_current_npages--;
>  }
> diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
> index fa56c86..0d2bba2 100644
> --- a/sys/dev/xen/balloon/balloon.c
> +++ b/sys/dev/xen/balloon/balloon.c
> @@ -94,13 +94,8 @@ SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
>  SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
>      &bs.balloon_high, 0, "High-mem balloon");
>  
> -struct balloon_entry {
> -	vm_page_t page;
> -	STAILQ_ENTRY(balloon_entry) list;
> -};
> -
>  /* List of ballooned pages, threaded through the mem_map array. */
> -static STAILQ_HEAD(,balloon_entry) ballooned_pages;
> +static TAILQ_HEAD(,vm_page) ballooned_pages;
>  
>  /* Main work function, always executed in process context. */
>  static void balloon_process(void *unused);
> @@ -110,47 +105,6 @@ static void balloon_process(void *unused);
>  #define WPRINTK(fmt, args...) \
>  	printk(KERN_WARNING "xen_mem: " fmt, ##args)
>  
> -/* balloon_append: add the given page to the balloon. */
> -static int
> -balloon_append(vm_page_t page)
> -{
> -	struct balloon_entry *entry;
> -
> -	mtx_assert(&balloon_mutex, MA_OWNED);
> -
> -	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT);
> -	if (!entry)
> -		return (ENOMEM);
> -	entry->page = page;
> -	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
> -	bs.balloon_low++;
> -
> -	return (0);
> -}
> -
> -/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
> -static vm_page_t
> -balloon_retrieve(void)
> -{
> -	vm_page_t page;
> -	struct balloon_entry *entry;
> -
> -	mtx_assert(&balloon_mutex, MA_OWNED);
> -
> -	if (STAILQ_EMPTY(&ballooned_pages))
> -		return (NULL);
> -
> -	entry = STAILQ_FIRST(&ballooned_pages);
> -	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
> -
> -	page = entry->page;
> -	free(entry, M_BALLOON);
> -	
> -	bs.balloon_low--;
> -
> -	return (page);
> -}
> -
>  static unsigned long 
>  current_target(void)
>  {
> @@ -203,7 +157,6 @@ static int
>  increase_reservation(unsigned long nr_pages)
>  {
>  	unsigned long  pfn, i;
> -	struct balloon_entry *entry;
>  	vm_page_t      page;
>  	long           rc;
>  	struct xen_memory_reservation reservation = {
> @@ -217,10 +170,9 @@ increase_reservation(unsigned long nr_pages)
>  	if (nr_pages > nitems(frame_list))
>  		nr_pages = nitems(frame_list);
>  
> -	for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
> -	     i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
> -		KASSERT(entry, ("ballooned_pages list corrupt"));
> -		page = entry->page;
> +	for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
> +	     i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
> +		KASSERT(page != NULL, ("ballooned_pages list corrupt"));
>  		frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
>  	}
>  
> @@ -245,8 +197,10 @@ increase_reservation(unsigned long nr_pages)
>  	}
>  
>  	for (i = 0; i < nr_pages; i++) {
> -		page = balloon_retrieve();
> -		KASSERT(page, ("balloon_retrieve failed"));
> +		page = TAILQ_FIRST(&ballooned_pages);
> +		KASSERT(page != NULL, ("Unable to get ballooned page"));
> +		TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
> +		bs.balloon_low--;
>  
>  		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
>  		KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
> @@ -255,7 +209,6 @@ increase_reservation(unsigned long nr_pages)
>  
>  		set_phys_to_machine(pfn, frame_list[i]);
>  
> -		vm_page_unwire(page, PQ_INACTIVE);
>  		vm_page_free(page);
>  	}
>  
> @@ -286,24 +239,27 @@ decrease_reservation(unsigned long nr_pages)
>  	for (i = 0; i < nr_pages; i++) {
>  		if ((page = vm_page_alloc(NULL, 0, 
>  			    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
> -			    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
> +			    VM_ALLOC_ZERO)) == NULL) {
>  			nr_pages = i;
>  			need_sleep = 1;
>  			break;
>  		}
>  
> +		if ((page->flags & PG_ZERO) == 0) {
> +			/*
> +			 * Zero the page, or else we might be leaking
> +			 * important data to other domains on the same
> +			 * host.
> +			 */
> +			pmap_zero_page(page);
> +		}
> +
>  		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
>  		frame_list[i] = PFNTOMFN(pfn);
>  
>  		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> -		if (balloon_append(page) != 0) {
> -			vm_page_unwire(page, PQ_INACTIVE);
> -			vm_page_free(page);
> -
> -			nr_pages = i;
> -			need_sleep = 1;
> -			break;
> -		}
> +		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
> +		bs.balloon_low++;
>  	}
>  
>  	set_xen_guest_handle(reservation.extent_start, frame_list);
> @@ -438,7 +394,8 @@ balloon_init(void *arg)
>  	/* Initialise the balloon with excess memory space. */
>  	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
>  		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
> -		balloon_append(page);
> +		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
> +		bs.balloon_low++;
>  	}
>  #undef max_pfn
>  #endif
> -- 
> 1.7.7.5 (Apple Git-26)
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-xen/attachments/20140621/2a3ab2da/attachment.sig>


More information about the freebsd-xen mailing list