PV i386 patch

Alan Cox alc at rice.edu
Tue Dec 20 09:02:55 UTC 2011


On 12/19/2011 18:09, Sean Bruno wrote:
> On Sat, 2011-12-17 at 18:01 -0800, Colin Percival wrote:
>> On 12/17/11 16:56, Sean Bruno wrote:
>>> This seems happy on our ref9 VMs.  I don't suppose this means I can go
>>> above 768M of Ram now?
>> Can't hurt to try... whatever the problem is with our code and large
>> amounts of RAM, the fact that it's an insta-panic during paging setup
>> suggests that it's something at a similar level of fail.
>>
> Nope, insta panic ... early though.  768M works, 1024M panics.
>
>
>
> [root at xen1 sbruno]# /usr/sbin/xm create -c ref9-xen32
> Using config file "/etc/xen/ref9-xen32".
> Started domain ref9-xen32 (id=109)
> WARNING: loader(8) metadata is missing!
> GDB: no debug ports present
> KDB: debugger backends: ddb
> KDB: current backend: ddb
> Copyright (c) 1992-2011 The FreeBSD Project.
> Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
>          The Regents of the University of California. All rights
> reserved.
> FreeBSD is a registered trademark of The FreeBSD Foundation.
> FreeBSD 9.0-PRERELEASE #0: Sat Dec 17 16:13:02 PST 2011
>
> sbruno at ref9-xen32.freebsd.org:/dumpster/scratch/sbruno-scratch/9/sys/i386/compile/XEN i386
> WARNING: WITNESS option enabled, expect reduced performance.
> panic: pmap_init: page table page is out of range
> cpuid = 0
> KDB: enter: panic
> [ thread pid 0 tid 0 ]
> Stopped at      0xc0181d7a:     movl    $0,0xc0478174
>
>
>

The code that panics shouldn't even exist in the Xen pmap.  Try the 
attached patch.

Alan

-------------- next part --------------
Index: vm/vm_map.c
===================================================================
--- vm/vm_map.c	(revision 228570)
+++ vm/vm_map.c	(working copy)
@@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/file.h>
+#include <sys/sbuf.h>	/* For sysctl_kernel_map() */
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/shm.h>
@@ -3890,6 +3891,65 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t en
 	vm_map_unlock_read(map);
 }
 
+static int sysctl_kernel_map(SYSCTL_HANDLER_ARGS);
+SYSCTL_OID(_vm, OID_AUTO, kernel_map, CTLTYPE_STRING | CTLFLAG_RD,
+    NULL, 0, sysctl_kernel_map, "A", "Kernel Map Info");
+
+static int
+sysctl_kernel_map(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf sbuf;
+	vm_map_entry_t entry;
+	vm_offset_t va;
+	vm_paddr_t locked_pa;
+	vm_page_t m;
+	char *cbuf;
+	const int cbufsize = kernel_map->nentries * 81;
+	int error, x, y;
+
+	cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
+	sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
+	for (entry = kernel_map->header.next; entry != &kernel_map->header;
+            entry = entry->next) {
+		x = y = 0;
+		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
+		for (va = entry->start; va < entry->end; va = 
+#ifdef __amd64__
+		    round_2mpage(va + 1)
+#else
+		    round_4mpage(va + 1)
+#endif
+		    ) {
+			if (va ==
+#ifdef __amd64__
+			    trunc_2mpage(va)
+#else
+			    trunc_4mpage(va)
+#endif
+			    ) {
+				locked_pa = 0;
+				x++;
+				if (pmap_mincore(kernel_pmap, va, &locked_pa) &
+				    MINCORE_SUPER)
+					y++;
+				if (locked_pa != 0) {
+					m = PHYS_TO_VM_PAGE(locked_pa);
+					vm_page_unlock(m);
+				}
+			}
+		}
+		sbuf_printf(&sbuf, "\nstart=%p, end=%p, %s=%p (%d/%d)",
+                    (void *)entry->start, (void *)entry->end,
+                    (entry->eflags & MAP_ENTRY_IS_SUB_MAP) ? "submap" : "object",
+                    entry->object.vm_object, x, y);
+	}
+	sbuf_finish(&sbuf);
+	error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
+	sbuf_delete(&sbuf);
+	free(cbuf, M_TEMP);
+	return (error);
+}
+
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
Index: vm/vm_object.c
===================================================================
--- vm/vm_object.c	(revision 228570)
+++ vm/vm_object.c	(working copy)
@@ -1308,6 +1308,14 @@ retry:
 			VM_OBJECT_LOCK(new_object);
 			goto retry;
 		}
+#if 0
+#if VM_NRESERVLEVEL > 0
+		/*
+		 * Rename the reservation.
+		 */
+		vm_reserv_rename(m, new_object, orig_object, offidxstart);
+#endif
+#endif
 		vm_page_lock(m);
 		vm_page_rename(m, new_object, idx);
 		vm_page_unlock(m);
Index: i386/xen/mp_machdep.c
===================================================================
--- i386/xen/mp_machdep.c	(revision 228570)
+++ i386/xen/mp_machdep.c	(working copy)
@@ -810,7 +810,7 @@ cpu_initialize_context(unsigned int cpu)
 {
 	/* vcpu_guest_context_t is too large to allocate on the stack.
 	 * Hence we allocate statically and protect it with a lock */
-	vm_page_t m[4];
+	vm_page_t m[NPGPTD + 2];
 	static vcpu_guest_context_t ctxt;
 	vm_offset_t boot_stack;
 	vm_offset_t newPTD;
@@ -831,8 +831,8 @@ cpu_initialize_context(unsigned int cpu)
 		pmap_zero_page(m[i]);
 
 	}
-	boot_stack = kmem_alloc_nofault(kernel_map, 1);
-	newPTD = kmem_alloc_nofault(kernel_map, NPGPTD);
+	boot_stack = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	newPTD = kmem_alloc_nofault(kernel_map, NPGPTD * PAGE_SIZE);
 	ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
 
 #ifdef PAE	
@@ -854,7 +854,7 @@ cpu_initialize_context(unsigned int cpu)
 	    nkpt*sizeof(vm_paddr_t));
 
 	pmap_qremove(newPTD, 4);
-	kmem_free(kernel_map, newPTD, 4);
+	kmem_free(kernel_map, newPTD, 4 * PAGE_SIZE);
 	/*
 	 * map actual idle stack to boot_stack
 	 */
Index: i386/xen/pmap.c
===================================================================
--- i386/xen/pmap.c	(revision 228570)
+++ i386/xen/pmap.c	(working copy)
@@ -184,9 +184,6 @@ __FBSDID("$FreeBSD$");
 #define PV_STAT(x)	do { } while (0)
 #endif
 
-#define	pa_index(pa)	((pa) >> PDRSHIFT)
-#define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
-
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
@@ -230,7 +227,6 @@ static int pat_works;			/* Is page attribute table
  * Data for the pv entry allocation mechanism
  */
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
@@ -636,24 +632,8 @@ pmap_ptelist_init(vm_offset_t *head, void *base, i
 void
 pmap_init(void)
 {
-	vm_page_t mpte;
-	vm_size_t s;
-	int i, pv_npg;
 
 	/*
-	 * Initialize the vm page array entries for the kernel pmap's
-	 * page table pages.
-	 */ 
-	for (i = 0; i < nkpt; i++) {
-		mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME));
-		KASSERT(mpte >= vm_page_array &&
-		    mpte < &vm_page_array[vm_page_array_size],
-		    ("pmap_init: page table page is out of range"));
-		mpte->pindex = i + KPTDI;
-		mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME);
-	}
-
-        /*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
@@ -664,26 +644,6 @@ pmap_init(void)
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
-	/*
-	 * Are large page mappings enabled?
-	 */
-	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
-
-	/*
-	 * Calculate the size of the pv head table for superpages.
-	 */
-	for (i = 0; phys_avail[i + 1]; i += 2);
-	pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
-
-	/*
-	 * Allocate memory for the pv head table for superpages.
-	 */
-	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
-	s = round_page(s);
-	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
-	for (i = 0; i < pv_npg; i++)
-		TAILQ_INIT(&pv_table[i].pv_list);
-
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
 	    PAGE_SIZE * pv_maxchunks);
@@ -3448,21 +3408,15 @@ pmap_page_wired_mappings(vm_page_t m)
 }
 
 /*
- * Returns TRUE if the given page is mapped individually or as part of
- * a 4mpage.  Otherwise, returns FALSE.
+ * Returns TRUE if the given page is mapped.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
-	boolean_t rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
-	vm_page_lock_queues();
-	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
-	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
-	vm_page_unlock_queues();
-	return (rv);
+	return (!TAILQ_EMPTY(&m->md.pv_list));
 }
 
 /*


More information about the freebsd-xen mailing list