svn commit: r269134 - head/sys/vm
Alan Cox
alc at rice.edu
Wed Jul 30 19:54:38 UTC 2014
On 07/30/2014 14:46, Alan Cox wrote:
> On 07/30/2014 13:58, Andreas Tobler wrote:
>> Hi Alan,
>>
>> On 26.07.14 20:10, Alan Cox wrote:
>>> Author: alc
>>> Date: Sat Jul 26 18:10:18 2014
>>> New Revision: 269134
>>> URL: http://svnweb.freebsd.org/changeset/base/269134
>>>
>>> Log:
>>> When unwiring a region of an address space, do not assume that the
>>> underlying physical pages are mapped by the pmap. If, for
>>> example, the
>>> application has performed an mprotect(..., PROT_NONE) on any part
>>> of the
>>> wired region, then those pages will no longer be mapped by the pmap.
>>> So, using the pmap to lookup the wired pages in order to unwire them
>>> doesn't always work, and when it doesn't work wired pages are leaked.
>>>
>>> To avoid the leak, introduce and use a new function
>>> vm_object_unwire()
>>> that locates the wired pages by traversing the object and its backing
>>> objects.
>>>
>>> At the same time, switch from using pmap_change_wiring() to the
>>> recently
>>> introduced function pmap_unwire() for unwiring the region's mappings.
>>> pmap_unwire() is faster, because it operates a range of virtual
>>> addresses
>>> rather than a single virtual page at a time. Moreover, by
>>> operating on
>>> a range, it is superpage friendly. It doesn't waste time performing
>>> unnecessary demotions.
>>>
>>> Reported by: markj
>>> Reviewed by: kib
>>> Tested by: pho, jmg (arm)
>>> Sponsored by: EMC / Isilon Storage Division
>> This commit brings my 32- and 64-bit PowerMac's into panic.
>> Unfortunately I'm not able to give you a backtrace in the form of a
>> textdump nor of a core dump.
>>
>> The only thing I have is this picture:
>>
>> http://people.freebsd.org/~andreast/r269134_panic.jpg
>>
>> Exactly this revision gives a panic and breaks the textdump/coredump
>> facility.
>>
>> How can I help debugging?
>>
> It appears to me that moea64_pvo_enter() had a pre-existing bug that got
> tickled by this change. Specifically, moea64_pvo_enter() doesn't set
> the PVO_WIRED flag when an unwired mapping already exists. It just
> returns with the mapping still in an unwired state. Consequently, when
> pmap_unwire() finally runs, it doesn't find a wired mapping.
>
> Try this:
>
> Index: powerpc/aim/mmu_oea64.c
> ===================================================================
> --- powerpc/aim/mmu_oea64.c (revision 269127)
> +++ powerpc/aim/mmu_oea64.c (working copy)
> @@ -2274,7 +2274,8 @@ moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t
> if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
> if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa &&
> (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC |
> LPTE_PP))
> - == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) {
> + == (pte_lo & (LPTE_NOEXEC | LPTE_PP)) &&
> + ((pvo->pvo_vaddr ^ flags) & PVO_WIRED)) {
> if (!(pvo->pvo_pte.lpte.pte_hi &
> LPTE_VALID)) {
> /* Re-insert if spilled */
> i = MOEA64_PTE_INSERT(mmu, ptegidx,
>
The new conditional test needs to be inverted. Try this instead:
Index: powerpc/aim/mmu_oea64.c
===================================================================
--- powerpc/aim/mmu_oea64.c (revision 269127)
+++ powerpc/aim/mmu_oea64.c (working copy)
@@ -2274,7 +2274,8 @@ moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t
if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa &&
(pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC |
LPTE_PP))
- == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) {
+ == (pte_lo & (LPTE_NOEXEC | LPTE_PP)) &&
+ ((pvo->pvo_vaddr ^ flags) & PVO_WIRED) == 0) {
if (!(pvo->pvo_pte.lpte.pte_hi &
LPTE_VALID)) {
/* Re-insert if spilled */
i = MOEA64_PTE_INSERT(mmu, ptegidx,
>>> Modified:
>>> head/sys/vm/vm_extern.h
>>> head/sys/vm/vm_fault.c
>>> head/sys/vm/vm_map.c
>>> head/sys/vm/vm_object.c
>>> head/sys/vm/vm_object.h
>>>
>>> Modified: head/sys/vm/vm_extern.h
>>> ==============================================================================
>>>
>>> --- head/sys/vm/vm_extern.h Sat Jul 26 17:59:25 2014 (r269133)
>>> +++ head/sys/vm/vm_extern.h Sat Jul 26 18:10:18 2014 (r269134)
>>> @@ -81,7 +81,6 @@ int vm_fault_hold(vm_map_t map, vm_offse
>>> int fault_flags, vm_page_t *m_hold);
>>> int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr,
>>> vm_size_t len,
>>> vm_prot_t prot, vm_page_t *ma, int max_count);
>>> -void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
>>> int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
>>> int vm_forkproc(struct thread *, struct proc *, struct thread *,
>>> struct vmspace *, int);
>>> void vm_waitproc(struct proc *);
>>>
>>> Modified: head/sys/vm/vm_fault.c
>>> ==============================================================================
>>>
>>> --- head/sys/vm/vm_fault.c Sat Jul 26 17:59:25 2014 (r269133)
>>> +++ head/sys/vm/vm_fault.c Sat Jul 26 18:10:18 2014 (r269134)
>>> @@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$");
>>> #define PFFOR 4
>>>
>>> static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t
>>> *, int *);
>>> +static void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t,
>>> boolean_t);
>>>
>>> #define VM_FAULT_READ_BEHIND 8
>>> #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX)
>>> @@ -1186,7 +1187,7 @@ vm_fault_wire(vm_map_t map, vm_offset_t
>>> *
>>> * Unwire a range of virtual addresses in a map.
>>> */
>>> -void
>>> +static void
>>> vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
>>> boolean_t fictitious)
>>> {
>>>
>>> Modified: head/sys/vm/vm_map.c
>>> ==============================================================================
>>>
>>> --- head/sys/vm/vm_map.c Sat Jul 26 17:59:25 2014 (r269133)
>>> +++ head/sys/vm/vm_map.c Sat Jul 26 18:10:18 2014 (r269134)
>>> @@ -132,6 +132,7 @@ static void _vm_map_init(vm_map_t map, p
>>> vm_offset_t max);
>>> static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t
>>> system_map);
>>> static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
>>> +static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
>>> #ifdef INVARIANTS
>>> static void vm_map_zdtor(void *mem, int size, void *arg);
>>> static void vmspace_zdtor(void *mem, int size, void *arg);
>>> @@ -2393,16 +2394,10 @@ done:
>>> (entry->eflags & MAP_ENTRY_USER_WIRED))) {
>>> if (user_unwire)
>>> entry->eflags &= ~MAP_ENTRY_USER_WIRED;
>>> - entry->wired_count--;
>>> - if (entry->wired_count == 0) {
>>> - /*
>>> - * Retain the map lock.
>>> - */
>>> - vm_fault_unwire(map, entry->start, entry->end,
>>> - entry->object.vm_object != NULL &&
>>> - (entry->object.vm_object->flags &
>>> - OBJ_FICTITIOUS) != 0);
>>> - }
>>> + if (entry->wired_count == 1)
>>> + vm_map_entry_unwire(map, entry);
>>> + else
>>> + entry->wired_count--;
>>> }
>>> KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
>>> ("vm_map_unwire: in-transition flag missing %p", entry));
>>> @@ -2635,19 +2630,12 @@ done:
>>> * unnecessary.
>>> */
>>> entry->wired_count = 0;
>>> - } else {
>>> - if (!user_wire ||
>>> - (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
>>> + } else if (!user_wire ||
>>> + (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
>>> + if (entry->wired_count == 1)
>>> + vm_map_entry_unwire(map, entry);
>>> + else
>>> entry->wired_count--;
>>> - if (entry->wired_count == 0) {
>>> - /*
>>> - * Retain the map lock.
>>> - */
>>> - vm_fault_unwire(map, entry->start, entry->end,
>>> - entry->object.vm_object != NULL &&
>>> - (entry->object.vm_object->flags &
>>> - OBJ_FICTITIOUS) != 0);
>>> - }
>>> }
>>> next_entry_done:
>>> KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
>>> @@ -2783,9 +2771,13 @@ vm_map_sync(
>>> static void
>>> vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
>>> {
>>> - vm_fault_unwire(map, entry->start, entry->end,
>>> - entry->object.vm_object != NULL &&
>>> - (entry->object.vm_object->flags & OBJ_FICTITIOUS) != 0);
>>> +
>>> + VM_MAP_ASSERT_LOCKED(map);
>>> + KASSERT(entry->wired_count > 0,
>>> + ("vm_map_entry_unwire: entry %p isn't wired", entry));
>>> + pmap_unwire(map->pmap, entry->start, entry->end);
>>> + vm_object_unwire(entry->object.vm_object, entry->offset,
>>> entry->end -
>>> + entry->start, PQ_ACTIVE);
>>> entry->wired_count = 0;
>>> }
>>>
>>>
>>> Modified: head/sys/vm/vm_object.c
>>> ==============================================================================
>>>
>>> --- head/sys/vm/vm_object.c Sat Jul 26 17:59:25 2014 (r269133)
>>> +++ head/sys/vm/vm_object.c Sat Jul 26 18:10:18 2014 (r269134)
>>> @@ -2202,6 +2202,78 @@ vm_object_set_writeable_dirty(vm_object_
>>> vm_object_set_flag(object, OBJ_MIGHTBEDIRTY);
>>> }
>>>
>>> +/*
>>> + * vm_object_unwire:
>>> + *
>>> + * For each page offset within the specified range of the given
>>> object,
>>> + * find the highest-level page in the shadow chain and unwire
>>> it. A page
>>> + * must exist at every page offset, and the highest-level page
>>> must be
>>> + * wired.
>>> + */
>>> +void
>>> +vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t
>>> length,
>>> + uint8_t queue)
>>> +{
>>> + vm_object_t tobject;
>>> + vm_page_t m, tm;
>>> + vm_pindex_t end_pindex, pindex, tpindex;
>>> + int depth, locked_depth;
>>> +
>>> + KASSERT((offset & PAGE_MASK) == 0,
>>> + ("vm_object_unwire: offset is not page aligned"));
>>> + KASSERT((length & PAGE_MASK) == 0,
>>> + ("vm_object_unwire: length is not a multiple of PAGE_SIZE"));
>>> + /* The wired count of a fictitious page never changes. */
>>> + if ((object->flags & OBJ_FICTITIOUS) != 0)
>>> + return;
>>> + pindex = OFF_TO_IDX(offset);
>>> + end_pindex = pindex + atop(length);
>>> + locked_depth = 1;
>>> + VM_OBJECT_RLOCK(object);
>>> + m = vm_page_find_least(object, pindex);
>>> + while (pindex < end_pindex) {
>>> + if (m == NULL || pindex < m->pindex) {
>>> + /*
>>> + * The first object in the shadow chain doesn't
>>> + * contain a page at the current index. Therefore,
>>> + * the page must exist in a backing object.
>>> + */
>>> + tobject = object;
>>> + tpindex = pindex;
>>> + depth = 0;
>>> + do {
>>> + tpindex +=
>>> + OFF_TO_IDX(tobject->backing_object_offset);
>>> + tobject = tobject->backing_object;
>>> + KASSERT(tobject != NULL,
>>> + ("vm_object_unwire: missing page"));
>>> + if ((tobject->flags & OBJ_FICTITIOUS) != 0)
>>> + goto next_page;
>>> + depth++;
>>> + if (depth == locked_depth) {
>>> + locked_depth++;
>>> + VM_OBJECT_RLOCK(tobject);
>>> + }
>>> + } while ((tm = vm_page_lookup(tobject, tpindex)) ==
>>> + NULL);
>>> + } else {
>>> + tm = m;
>>> + m = TAILQ_NEXT(m, listq);
>>> + }
>>> + vm_page_lock(tm);
>>> + vm_page_unwire(tm, queue);
>>> + vm_page_unlock(tm);
>>> +next_page:
>>> + pindex++;
>>> + }
>>> + /* Release the accumulated object locks. */
>>> + for (depth = 0; depth < locked_depth; depth++) {
>>> + tobject = object->backing_object;
>>> + VM_OBJECT_RUNLOCK(object);
>>> + object = tobject;
>>> + }
>>> +}
>>> +
>>> #include "opt_ddb.h"
>>> #ifdef DDB
>>> #include <sys/kernel.h>
>>>
>>> Modified: head/sys/vm/vm_object.h
>>> ==============================================================================
>>>
>>> --- head/sys/vm/vm_object.h Sat Jul 26 17:59:25 2014 (r269133)
>>> +++ head/sys/vm/vm_object.h Sat Jul 26 18:10:18 2014 (r269134)
>>> @@ -291,6 +291,8 @@ void vm_object_shadow (vm_object_t *, vm
>>> void vm_object_split(vm_map_entry_t);
>>> boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t,
>>> boolean_t,
>>> boolean_t);
>>> +void vm_object_unwire(vm_object_t object, vm_ooffset_t offset,
>>> + vm_size_t length, uint8_t queue);
>>> #endif /* _KERNEL */
>>>
>>> #endif /* _VM_OBJECT_ */
>>>
>>>
>>>
>>
>
More information about the svn-src-head
mailing list