CUREENT issue with ballon.c

Roger Pau Monné roger.pau at citrix.com
Mon Oct 28 10:41:25 UTC 2013


On 25/10/13 00:24, Outback Dingo wrote:
> On Thu, Oct 24, 2013 at 6:17 PM, Roger Pau Monné <roger.pau at citrix.com>wrote:
> 
>> On 24/10/13 22:15, Konstantin Belousov wrote:
>>> On Thu, Oct 24, 2013 at 09:45:20PM +0100, Roger Pau Monn? wrote:
>>>> On 24/10/13 13:01, Outback Dingo wrote:
>>>>>
>>>>>
>>>>> On Thu, Oct 24, 2013 at 6:16 AM, Roger Pau Monn? <roger.pau at citrix.com
>>>>> <mailto:roger.pau at citrix.com>> wrote:
>>>>>
>>>>>     On 24/10/13 03:02, Outback Dingo wrote:
>>>>>     > --- trap 0, rip = 0, rsp = 0xfffffe00002c6b70, rbp = 0 ---
>>>>>     > uma_zalloc_arg: zone "16" with the following non-sleepable locks
>> held:
>>>>>     > exclusive sleep mutex balloon_lock (balloon_lock) r = 0
>>>>>     > (0xffffffff816e9c58) locked @
>>>>>     /usr/src/sys/dev/xen/balloon/balloon.c:339
>>>>>     > exclusive sleep mutex balloon_mutex (balloon_mutex) r = 0
>>>>>     > (0xffffffff816e9c38) locked @
>>>>>     /usr/src/sys/dev/xen/balloon/balloon.c:373
>>>>>     > KDB: stack backtrace:
>>>>>     > db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
>>>>>     > 0xfffffe00002c67c0
>>>>>     > kdb_backtrace() at kdb_backtrace+0x39/frame 0xfffffe00002c6870
>>>>>     > witness_warn() at witness_warn+0x4a8/frame 0xfffffe00002c6930
>>>>>     > uma_zalloc_arg() at uma_zalloc_arg+0x3b/frame 0xfffffe00002c69a0
>>>>>     > malloc() at malloc+0x101/frame 0xfffffe00002c69f0
>>>>>     > balloon_process() at balloon_process+0x44a/frame
>> 0xfffffe00002c6a70
>>>>>     > fork_exit() at fork_exit+0x84/frame 0xfffffe00002c6ab0
>>>>>     > fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe00002c6ab0
>>>>>     > --- trap 0, rip = 0, rsp = 0xfffffe00002c6b70, rbp = 0 ---
>>>>>     > uma_zalloc_arg: zone "16" with the following non-sleepable locks
>> held:
>>>>>     > exclusive sleep mutex balloon_lock (balloon_lock) r = 0
>>>>>     > (0xffffffff816e9c58) locked @
>>>>>     /usr/src/sys/dev/xen/balloon/balloon.c:339
>>>>>     > exclusive sleep mutex balloon_mutex (balloon_mutex) r = 0
>>>>>     > (0xffffffff816e9c38) locked @
>>>>>     /usr/src/sys/dev/xen/balloon/balloon.c:373
>>>>>     > KDB: stack backtrace:
>>>>>     > db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame
>>>>>     > 0xfffffe00002c67c0
>>>>>     > kdb_backtrace() at kdb_backtrace+0x39/frame 0xfffffe00002c6870
>>>>>     > witness_warn() at witness_warn+0x4a8/frame 0xfffffe00002c6930
>>>>>     > uma_zalloc_arg() at uma_zalloc_arg+0x3b/frame 0xfffffe00002c69a0
>>>>>     > malloc() at malloc+0x101/frame 0xfffffe00002c69f0
>>>>>     > balloon_process() at balloon_process+0x44a/frame
>> 0xfffffe00002c6a70
>>>>>     > fork_exit() at fork_exit+0x84/frame 0xfffffe00002c6ab0
>>>>>     > fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe00002c6ab0
>>>>>     > --- trap 0, rip = 0, rsp = 0xfffffe00002c6b70, rbp = 0 ---
>>>>>     > uma_zalloc_arg: zone "16" with the following non-sleepable locks
>> held:
>>>>>
>>>>>     Did you do anything specific to trigger the crash? Can you explain
>> the
>>>>>     steps needed to reproduce it?
>>>>>
>>>>>
>>>>> just recompiled a kernel, and booted it scrolls continuously across the
>>>>> screen
>>>>> doesnt seem to ever stop.
>>>>
>>>> I've tried r257051 and it seems to work fine, could you please post your
>>>> Xen version, the config file used to launch the VM and the toolstack
>> used?
>>>
>>> Do you have witness enabled in your kernel config ?
>>
>> Yes, but I'm not touching balloon memory target.
>>
>>> There is an obvious case of calling malloc(M_WAITOK) while holding both
>>> balloon_lock and balloon_mutex:
>>> ballon_process->decrease_reservation->balloon_append.
>>
>> Yes, I'm aware of that, it's just that it shouldn't happen unless you
>> actually trigger a balloon memory decrease, which should not happen
>> automatically AFAIK, that's why I was asking if this was happening
>> without the user specifically requesting it.
>>
>> Anyway, this should be clearly fixed and pulled into 10 no matter what
>> triggered it. I will send a patch as soon as possible.
>>
>>
> Yes, WITNESS was enabled, im using Kubuntu / XEN kernel / and
> virt-manager.... it was fine running Current, until i ran updates this week
> then encountered this. Ive since disabled WITNESS with a recompile, but the
> VM still apears more sluggish then before.
> 
> root at M14xR2:/home/dingo# xm info
> host                   : M14xR2
> release                : 3.11.0-12-generic
> version                : #19-Ubuntu SMP Wed Oct 9 16:20:46 UTC 2013
> machine                : x86_64
> nr_cpus                : 4
> nr_nodes               : 1
> cores_per_socket       : 2
> threads_per_core       : 2
> cpu_mhz                : 2494
> xen:/// capabilities:
> <capabilities>
> 
>   <host>
>     <cpu>
>       <arch>x86_64</arch>
>       <features>
>         <pae/>
>       </features>
>     </cpu>
>     <power_management>
>       <suspend_mem/>
>       <suspend_disk/>
>       <suspend_hybrid/>
>     </power_management>
>     <migration_features>
>       <live/>
>       <uri_transports>
>         <uri_transport>xenmigr</uri_transport>
>       </uri_transports>
>     </migration_features>
>   </host>
> 
>   <guest>
>     <os_type>xen</os_type>
>     <arch name='x86_64'>
>       <wordsize>64</wordsize>
>       <emulator>qemu-dm</emulator>
>       <machine>xenpv</machine>
>       <domain type='xen'>
>       </domain>
>     </arch>
>   </guest>
> 
>   <guest>
>     <os_type>xen</os_type>
>     <arch name='i686'>
>       <wordsize>32</wordsize>
>       <emulator>qemu-dm</emulator>
>       <machine>xenpv</machine>
>       <domain type='xen'>
>       </domain>
>     </arch>
>     <features>
>       <pae/>
>     </features>
>   </guest>
> 
>   <guest>
>     <os_type>hvm</os_type>
>     <arch name='i686'>
>       <wordsize>32</wordsize>
>       <emulator>qemu-dm</emulator>
>       <loader>hvmloader</loader>
>       <machine>xenfv</machine>
>       <domain type='xen'>
>       </domain>
>     </arch>
>     <features>
>       <pae/>
>       <nonpae/>
>       <acpi default='on' toggle='yes'/>
>       <apic default='on' toggle='no'/>
>       <hap default='off' toggle='yes'/>
>       <viridian default='off' toggle='yes'/>
>     </features>
>   </guest>
> 
>   <guest>
>     <os_type>hvm</os_type>
>     <arch name='x86_64'>
>       <wordsize>64</wordsize>
>       <emulator>qemu-dm</emulator>
>       <loader>hvmloader</loader>
>       <machine>xenfv</machine>
>       <domain type='xen'>
>       </domain>
>     </arch>
>     <features>
>       <acpi default='on' toggle='yes'/>
>       <apic default='on' toggle='no'/>
>       <hap default='off' toggle='yes'/>
>       <viridian default='off' toggle='yes'/>
>     </features>
>   </guest>
> 
> </capabilities>
> 
> 
> 
> hw_caps                :
> bfebfbff:28100800:00000000:00007f00:77bae3bf:00000000:00000001:00000281
> virt_caps              : hvm
> total_memory           : 8074
> free_memory            : 12
> free_cpus              : 0
> xen_major              : 4
> xen_minor              : 3
> xen_extra              : .0
> xen_caps               : xen-3.0-x86_64 xen-3.0-x86_32p hvm-3.0-x86_32
> hvm-3.0-x86_32p hvm-3.0-x86_64
> xen_scheduler          : credit
> xen_pagesize           : 4096
> platform_params        : virt_start=0xffff800000000000
> xen_changeset          :
> xen_commandline        : placeholder
> cc_compiler            : gcc (Ubuntu/Linaro 4.8.1-10ubuntu5) 4.8.1
> cc_compile_by          : stefan.bader
> cc_compile_domain      : canonical.com
> cc_compile_date        : Wed Oct  2 11:17:12 UTC 2013
> xend_config_format     : 4
> root at M14xR2:/home/dingo# uname -a
> Linux M14xR2 3.11.0-12-generic #19-Ubuntu SMP Wed Oct 9 16:20:46 UTC 2013
> x86_64 x86_64 x86_64 GNU/Linux

I've never used libvirt, so I'm unsure why is it triggering a balloon
update without the user requesting it (maybe too many guests running on
the same host?).

Anyway, the attached patch should fix the issues with the balloon
driver, it contains some clean up of the code, the fixes to avoid
calling malloc with M_WAITOK while holding the mutex and also a fix for
properly accounting for memory. Right now if we reduce memory to 900MB
for example FreeBSD keeps using more memory, because physmem doesn't
account for the memory where the kernel is mapped or the MP boot stacks.

Could you please try it and report the results?

-------------- next part --------------
>From 0d37840d4c3b6e838320ac9fb96855bbdf6513fb Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau at citrix.com>
Date: Fri, 25 Oct 2013 09:58:23 +0100
Subject: [PATCH] xen-ballon: fix and clean balloon driver

Remove unused cruft from the Xen balloon driver, fix calling a
sleepable malloc while holding the balloon mutex and do proper
accounting of the memory used by the domain.
---
 sys/dev/xen/balloon/balloon.c |  193 +++++++++++------------------------------
 1 files changed, 50 insertions(+), 143 deletions(-)

diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index 9021abb..894aa9a 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -52,18 +52,13 @@ __FBSDID("$FreeBSD$");
 
 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
 
-struct mtx balloon_mutex;
+/* Convert from KB (as fetched from xenstore) to number of PAGES */
+#define KB_TO_PAGE_SHIFT	(PAGE_SHIFT - 10)
 
-/*
- * Protects atomic reservation decrease/increase against concurrent increases.
- * Also protects non-atomic updates of current_pages and driver_pages, and
- * balloon lists.
- */
-struct mtx balloon_lock;
+struct mtx balloon_mutex;
 
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
-#define ARRAY_SIZE(A)	(sizeof(A) / sizeof(A[0]))
 
 struct balloon_stats {
 	/* We aim for 'current allocation' == 'target allocation'. */
@@ -116,15 +111,21 @@ static void balloon_process(void *unused);
 	printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 /* balloon_append: add the given page to the balloon. */
-static void 
+static int
 balloon_append(vm_page_t page)
 {
 	struct balloon_entry *entry;
 
-	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
+	mtx_assert(&balloon_mutex, MA_OWNED);
+
+	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT);
+	if (!entry)
+		return (ENOMEM);
 	entry->page = page;
 	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
 	bs.balloon_low++;
+
+	return 0;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
@@ -134,6 +135,8 @@ balloon_retrieve(void)
 	vm_page_t page;
 	struct balloon_entry *entry;
 
+	mtx_assert(&balloon_mutex, MA_OWNED);
+
 	if (STAILQ_EMPTY(&ballooned_pages))
 		return NULL;
 
@@ -210,10 +213,10 @@ increase_reservation(unsigned long nr_pages)
 		.domid        = DOMID_SELF
 	};
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	mtx_assert(&balloon_mutex, MA_OWNED);
 
-	mtx_lock(&balloon_lock);
+	if (nr_pages > nitems(frame_list))
+		nr_pages = nitems(frame_list);
 
 	for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
 	     i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
@@ -253,32 +256,13 @@ increase_reservation(unsigned long nr_pages)
 
 		set_phys_to_machine(pfn, frame_list[i]);
 
-#if 0
-#ifndef XENHVM
-		/* Link back into the page tables if not highmem. */
-		if (pfn < max_low_pfn) {
-			int ret;
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)__va(pfn << PAGE_SHIFT),
-				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
-				0);
-			PASSING(ret == 0,
-			    ("HYPERVISOR_update_va_mapping failed"));
-		}
-#endif
-#endif
-
-		/* Relinquish the page back to the allocator. */
 		vm_page_unwire(page, 0);
 		vm_page_free(page);
 	}
 
 	bs.current_pages += nr_pages;
-	//totalram_pages = bs.current_pages;
 
  out:
-	mtx_unlock(&balloon_lock);
-
 	return 0;
 }
 
@@ -295,8 +279,10 @@ decrease_reservation(unsigned long nr_pages)
 		.domid        = DOMID_SELF
 	};
 
-	if (nr_pages > ARRAY_SIZE(frame_list))
-		nr_pages = ARRAY_SIZE(frame_list);
+	mtx_assert(&balloon_mutex, MA_OWNED);
+
+	if (nr_pages > nitems(frame_list))
+		nr_pages = nitems(frame_list);
 
 	for (i = 0; i < nr_pages; i++) {
 		if ((page = vm_page_alloc(NULL, 0, 
@@ -310,39 +296,15 @@ decrease_reservation(unsigned long nr_pages)
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 		frame_list[i] = PFNTOMFN(pfn);
 
-#if 0
-		if (!PageHighMem(page)) {
-			v = phys_to_virt(pfn << PAGE_SHIFT);
-			scrub_pages(v, 1);
-#ifdef CONFIG_XEN
-			ret = HYPERVISOR_update_va_mapping(
-				(unsigned long)v, __pte_ma(0), 0);
-			BUG_ON(ret);
-#endif
-		}
-#endif
-#ifdef CONFIG_XEN_SCRUB_PAGES
-		else {
-			v = kmap(page);
-			scrub_pages(v, 1);
-			kunmap(page);
-		}
-#endif
-	}
-
-#ifdef CONFIG_XEN
-	/* Ensure that ballooned highmem pages don't have kmaps. */
-	kmap_flush_unused();
-	flush_tlb_all();
-#endif
-
-	mtx_lock(&balloon_lock);
-
-	/* No more mappings: invalidate P2M and add to balloon. */
-	for (i = 0; i < nr_pages; i++) {
-		pfn = MFNTOPFN(frame_list[i]);
 		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+		if (balloon_append(page)) {
+			vm_page_unwire(page, 0);
+			vm_page_free(page);
+
+			nr_pages = i;
+			need_sleep = 1;
+			break;
+		}
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
@@ -351,9 +313,6 @@ decrease_reservation(unsigned long nr_pages)
 	KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
 	bs.current_pages -= nr_pages;
-	//totalram_pages = bs.current_pages;
-
-	mtx_unlock(&balloon_lock);
 
 	return (need_sleep);
 }
@@ -428,7 +387,7 @@ watch_target(struct xs_watch *watch,
 	/* The given memory/target value is in KiB, so it needs converting to
 	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 	*/
-	set_new_target(new_target >> (PAGE_SHIFT - 10));
+	set_new_target(new_target >> KB_TO_PAGE_SHIFT);
     
 }
 
@@ -456,18 +415,35 @@ balloon_init(void *arg)
 	unsigned long pfn;
 
 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn
+#else
+	unsigned long long static_max, video_ram = 0;
+	int err;
 #endif
 
 	if (!is_running_on_xen())
 		return;
 
-	mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
 	mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
 
 #ifndef XENHVM
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 #else
-	bs.current_pages = physmem;
+	err = xs_scanf(XST_NIL, "memory", "static-max", NULL, "%llu",
+	               &static_max);
+	xs_scanf(XST_NIL, "memory", "videoram", NULL, "%llu",
+	         &video_ram);
+	if (err) {
+		/*
+		 * Using physmem is not accurate, prefer xenstore
+		 * values as given by Xen when possible.
+		 */
+		static_max = physmem;
+	} else {
+		static_max -= video_ram;
+		static_max >>= KB_TO_PAGE_SHIFT;
+	}
+
+	bs.current_pages = static_max;
 #endif
 	bs.target_pages  = bs.current_pages;
 	bs.balloon_low   = 0;
@@ -497,76 +473,7 @@ void balloon_update_driver_allowance(long delta);
 void 
 balloon_update_driver_allowance(long delta)
 {
-	mtx_lock(&balloon_lock);
+	mtx_lock(&balloon_mutex);
 	bs.driver_pages += delta;
-	mtx_unlock(&balloon_lock);
-}
-
-#if 0
-static int dealloc_pte_fn(
-	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
-{
-	unsigned long mfn = pte_mfn(*pte);
-	int ret;
-	struct xen_memory_reservation reservation = {
-		.extent_start = &mfn,
-		.nr_extents   = 1,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
-	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-	KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
-	return 0;
-}
-
-#endif
-
-#if 0
-vm_page_t
-balloon_alloc_empty_page_range(unsigned long nr_pages)
-{
-	vm_page_t pages;
-	int i, rc;
-	unsigned long *mfn_list;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-
-	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
-	if (pages == NULL)
-		return NULL;
-	
-	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
-	
-	for (i = 0; i < nr_pages; i++) {
-		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
-		PFNTOMFN(i) = INVALID_P2M_ENTRY;
-		reservation.extent_start = mfn_list;
-		reservation.nr_extents = nr_pages;
-		rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
-		    &reservation);
-		KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
-	}
-
-	current_pages -= nr_pages;
-
-	wakeup(balloon_process);
-
-	return pages;
-}
-
-void 
-balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
-{
-	unsigned long i;
-
-	for (i = 0; i < nr_pages; i++)
-		balloon_append(page + i);
-
-	wakeup(balloon_process);
+	mtx_unlock(&balloon_mutex);
 }
-#endif
-- 
1.7.7.5 (Apple Git-26)



More information about the freebsd-current mailing list