PERFORCE change 92714 for review
Kip Macy
kmacy at FreeBSD.org
Sat Mar 4 00:44:27 PST 2006
http://perforce.freebsd.org/chv.cgi?CH=92714
Change 92714 by kmacy at kmacy_storage:sun4v_work on 2006/03/04 08:43:25
re-define pmap to be hashtable backed for user-processes and TSBs
to be per process.
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/conf/files.sun4v#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asi.h#9 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asmacros.h#4 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pcb.h#5 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pmap.h#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tsb.h#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte.h#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/locore.S#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#13 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/support.S#7 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tsb.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#1 add
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/conf/files.sun4v#7 (text+ko) ====
@@ -74,6 +74,7 @@
sun4v/sun4v/swtch.S standard
sun4v/sun4v/tsb.c standard
sun4v/sun4v/tte.c standard
+sun4v/sun4v/tte_hash.c standard
sun4v/sun4v/tick.c standard
sun4v/sun4v/trap.c standard
sun4v/sun4v/uio_machdep.c standard
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asi.h#9 (text+ko) ====
@@ -124,10 +124,10 @@
#define ASI_SCRATCHPAD_6_REG 0x30
#define ASI_SCRATCHPAD_7_REG 0x38
-/* MMFSA == 0 */
+
+#define SCRATCH_REG_MMFSA ASI_SCRATCHPAD_0_REG
#define SCRATCH_REG_PCPU ASI_SCRATCHPAD_1_REG
-#define SCRATCH_REG_PCB ASI_SCRATCHPAD_2_REG
-#define SCRATCH_REG_PTD ASI_SCRATCHPAD_3_REG
+#define SCRATCH_REG_HASH ASI_SCRATCHPAD_2_REG
#define MMU_CID_P 0x08
#define MMU_CID_S 0x10
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asmacros.h#4 (text+ko) ====
@@ -135,13 +135,11 @@
mov SCRATCH_REG_PCPU, PCPU_REG; \
ldxa [%g0 + PCPU_REG]ASI_SCRATCHPAD, PCPU_REG;
-#define GET_PTD_SCRATCH(reg) \
- mov SCRATCH_REG_PTD, reg; \
+#define GET_HASH_SCRATCH(reg) \
+ mov SCRATCH_REG_HASH, reg; \
ldxa [%g0 + reg]ASI_SCRATCHPAD, reg;
-#define SET_MMFSA_SCRATCH(reg) stxa reg, [%g0]ASI_SCRATCHPAD
-#define SET_PCPU_SCRATCH stxa PCPU_REG, [%g0 + SCRATCH_REG_PCPU]ASI_SCRATCHPAD
-#define SET_PTD_SCRATCH(reg) stxa reg, [%g0 + SCRATCH_REG_PTD]ASI_SCRATCHPAD
+#define SET_HASH_SCRATCH(reg) stxa reg, [%g0 + SCRATCH_REG_HASH]ASI_SCRATCHPAD
#define GET_PCB(reg) \
GET_PCPU_SCRATCH; \
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pcb.h#5 (text+ko) ====
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pmap.h#6 (text+ko) ====
@@ -50,6 +50,7 @@
typedef struct pmap *pmap_t;
struct pv_entry;
+struct tte_hash;
struct md_page {
int pv_list_count;
@@ -58,12 +59,12 @@
struct pmap {
- struct mtx pm_mtx;
- vm_paddr_t *pm_pdir;
+ struct mtx pm_mtx;
+ struct tte_hash *pm_hash;
TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
- vm_object_t pm_tsb_obj;
- cpumask_t pm_active;
- uint16_t pm_context;
+ struct hv_tsb_info pm_tsb;
+ cpumask_t pm_active;
+ uint16_t pm_context;
struct pmap_statistics pm_stats;
};
@@ -106,6 +107,10 @@
void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
void pmap_invalidate_all(pmap_t pmap);
+void pmap_scrub_pages(vm_paddr_t pa, int64_t size);
+
+
+
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
extern struct pmap kernel_pmap_store;
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tsb.h#7 (text+ko) ====
@@ -1,59 +1,8 @@
-/*-
- * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Berkeley Software Design Inc's name may not be used to endorse or
- * promote products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: BSDI: pmap.v9.h,v 1.10.2.6 1999/08/23 22:18:44 cp Exp
- * $FreeBSD: src/sys/sparc64/include/tsb.h,v 1.18 2003/04/08 06:35:08 jake Exp $
- */
-
#ifndef _MACHINE_TSB_H_
#define _MACHINE_TSB_H_
#define MAX_TSB_INFO 2
-#define TSB_PAGES_SHIFT (4)
-#define TSB_PAGES (1 << TSB_PAGES_SHIFT)
-#define TSB_BSHIFT (TSB_PAGES_SHIFT + PAGE_SHIFT)
-#define TSB_BSIZE (1UL << TSB_BSHIFT)
-#define TSB_SIZE (TSB_BSIZE / sizeof(struct tte))
-#define TSB_BUCKET_SHIFT (2)
-#define TSB_BUCKET_SIZE (1 << TSB_BUCKET_SHIFT)
-#define TSB_BUCKET_ADDRESS_BITS \
- (TSB_BSHIFT - TSB_BUCKET_SHIFT - TTE_SHIFT)
-#define TSB_BUCKET_MASK ((1 << TSB_BUCKET_ADDRESS_BITS) - 1)
-
-
-#define TSB_ENTRY_SHIFT 4 /* each entry = 128 bits = 16 bytes */
-#define TSB_ENTRY_SIZE (1 << 4)
-#define TSB_START_SIZE 9
-#define TSB_ENTRIES(tsbsz) (1 << (TSB_START_SIZE + tsbsz))
-#define TSB_BYTES(tsbsz) (TSB_ENTRIES(tsbsz) << TSB_ENTRY_SHIFT)
-#define TSB_OFFSET_MASK(tsbsz) (TSB_ENTRIES(tsbsz) - 1)
-
-
/*
* Values for "tsb_ttesz_mask" bitmask.
*/
@@ -74,17 +23,19 @@
struct hv_tsb_info;
-typedef uint64_t tte_t;
+
+void tsb_assert_invalid(struct hv_tsb_info *tsb, vm_offset_t va);
-void tsb_assert_invalid(hv_tsb_info_t *tsb, vm_offset_t va);
+void tsb_set_tte(struct hv_tsb_info *tsb, vm_offset_t va, tte_t tte_data, uint64_t ctx);
-void tsb_set_tte(struct hv_tsb_info *tsb, vm_offset_t va, vm_paddr_t pa, uint64_t flags, uint64_t ctx);
+tte_t tsb_get_tte(struct hv_tsb_info *tsb, vm_offset_t va);
-tte_t tsb_get_tte(struct hv_tsb_info *tsb, vm_offset_t va, uint64_t ctx);
+tte_t tsb_lookup_tte(vm_offset_t va, uint64_t context);
-tte_t tsb_lookup_tte(vm_offset_t va, uint64_t ctx);
+void tsb_clear(struct hv_tsb_info *tsb);
-void tsb_clear_tte(struct hv_tsb_info *tsb, vm_offset_t, uint64_t ctx);
+void tsb_clear_tte(struct hv_tsb_info *tsb, vm_offset_t va);
+void tsb_clear_range(struct hv_tsb_info *tsb, vm_offset_t sva, vm_offset_t eva);
#endif /* !_MACHINE_TSB_H_ */
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte.h#6 (text+ko) ====
@@ -111,7 +111,7 @@
#define VTD_W (1UL << 6)
#define VTD_REF (1UL << 5)
-#define VTD_WR_PERM (1UL << 4)
+#define VTD_SW_W (1UL << 4)
#define VTD_MANAGED (1UL << 58)
#define VTD_WIRED (1UL << 57)
#define VTD_LOCK (1UL << 56)
@@ -128,8 +128,9 @@
/*
* default flags for kernel pages
*/
-#define TTE_KERNEL VTD_V | VTD_CP | VTD_CV | VTD_P | VTD_X | VTD_W | VTD_WR_PERM | VTD_REF | VTD_WIRED
+#define TTE_KERNEL VTD_V | VTD_CP | VTD_CV | VTD_P | VTD_X | VTD_W | VTD_SW_W | VTD_REF | VTD_WIRED
#define TTE_KERNEL_MINFLAGS VTD_V | VTD_CP | VTD_CV | VTD_P
+#define TTE_MINFLAGS VTD_V | VTD_CP | VTD_CV
#define VTD_SIZE_BITS (4)
#define VTD_SIZE_MASK ((1 << VTD_SIZE_BITS) - 1)
@@ -227,12 +228,18 @@
struct pmap;
+typedef uint64_t tte_t;
+
void tte_clear_phys_bit(vm_page_t m, uint64_t flags);
+
void tte_set_phys_bit(vm_page_t m, uint64_t flags);
+
boolean_t tte_get_phys_bit(vm_page_t m, uint64_t flags);
void tte_clear_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags);
+
void tte_set_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags);
+
boolean_t tte_get_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags);
#endif /* !_MACHINE_TTE_H_ */
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/locore.S#6 (text+ko) ====
@@ -115,8 +115,6 @@
*/
mov SCRATCH_REG_PCPU, %g1
stxa PCPU_REG, [%g1]ASI_SCRATCHPAD
- mov SCRATCH_REG_PCB, %g1
- stxa %g6, [%g1]ASI_SCRATCHPAD
retl
nop
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#13 (text+ko) ====
@@ -46,6 +46,7 @@
#include <machine/smp.h>
#include <machine/tlb.h>
#include <machine/tte.h>
+#include <machine/tte_hash.h>
#include <machine/pcb.h>
#include <machine/tsb.h>
@@ -143,7 +144,6 @@
static void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t locked_pmap);
-static void pmap_scrub_pages(vm_paddr_t pa, int64_t size);
static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
@@ -202,11 +202,16 @@
static const struct timeval printinterval = { 60, 0 };
static struct timeval lastprint;
struct vpgqueues *vpq;
+ uint64_t *tte, tte_data;
pmap_t pmap;
pv_entry_t allocated_pv, next_pv, pv;
vm_offset_t va;
vm_page_t m;
+
+ KASSERT(locked_pmap->pm_context != 0,
+ ("context 0 not backed by pv_entry management"));
+
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
@@ -234,7 +239,6 @@
if (m->hold_count || m->busy || (m->flags & PG_BUSY))
continue;
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- UNIMPLEMENTED;
va = pv->pv_va;
pmap = pv->pv_pmap;
/* Avoid deadlock and lock recursion. */
@@ -243,30 +247,29 @@
else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
continue;
pmap->pm_stats.resident_count--;
-#ifdef notyet
- pte = pmap_pte_quick(pmap, va);
- tpte = pte_load_clear(pte);
- KASSERT((tpte & PG_W) == 0,
- ("get_pv_entry: wired pte %#jx", (uintmax_t)tpte));
- if (tpte & PG_A)
+
+ tte = tte_hash_lookup(pmap->pm_hash, va);
+ tte_data = *tte;
+ tte_hash_delete(pmap->pm_hash, va);
+
+ KASSERT((tte_data & VTD_WIRED) == 0,
+ ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data));
+ if (tte_data & VTD_REF)
vm_page_flag_set(m, PG_REFERENCED);
- if (tpte & PG_M) {
- KASSERT((tpte & PG_RW),
- ("get_pv_entry: modified page not writable: va: %#x, pte: %#jx",
- va, (uintmax_t)tpte));
- if (pmap_track_modified(va))
+ if (tte_data & VTD_W) {
+ KASSERT((tte_data & VTD_SW_W),
+ ("get_pv_entry: modified page not writable: va: %lx, tte: %lx",
+ va, tte_data));
+ if (pmap_track_modified(locked_pmap, va))
vm_page_dirty(m);
}
-#endif
+
pmap_invalidate_page(pmap, va);
TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
if (TAILQ_EMPTY(&m->md.pv_list))
vm_page_flag_clear(m, PG_WRITEABLE);
m->md.pv_list_count--;
-#ifdef notyet
- pmap_unuse_pt(pmap, va);
-#endif
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
@@ -320,6 +323,7 @@
{
pmap_t pmap, oldpmap;
+
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
@@ -330,8 +334,7 @@
oldpmap->pm_active &= ~1;
pmap->pm_active |= 1;
#endif
-
- set_pdir_scratchpad(pmap->pm_pdir);
+ tte_hash_set_scratchpad(pmap->pm_hash);
PCPU_SET(curpmap, pmap);
critical_exit();
}
@@ -456,8 +459,8 @@
#ifdef notyet
/* XXX this tries to map at a wacky address */
for (i = 0; i < (MSGBUF_SIZE / PAGE_SIZE); i++)
- tsb_set_tte(&kernel_td[TSB8K_INDEX], ((vm_offset_t)msgbufp) + i*PAGE_SIZE , msgbuf_phys + i*PAGE_SIZE,
- TTE_KERNEL | VTD_8K, 0);
+ tsb_set_tte(&kernel_td[TSB8K_INDEX], ((vm_offset_t)msgbufp) + i*PAGE_SIZE ,
+ msgbuf_phys + i*PAGE_SIZE | TTE_KERNEL | VTD_8K, 0);
#endif
/*
@@ -480,7 +483,7 @@
for (i = 0; i < KSTACK_PAGES; i++) {
pa = kstack0_phys + i * PAGE_SIZE;
va = kstack0 + i * PAGE_SIZE;
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va , pa, TTE_KERNEL | VTD_8K, 0);
+ tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0);
}
/*
@@ -521,14 +524,14 @@
if (translations[i].om_size == PAGE_SIZE_4M) {
tsb_assert_invalid(&kernel_td[TSB4M_INDEX], translations[i].om_start);
tsb_set_tte(&kernel_td[TSB4M_INDEX], translations[i].om_start,
- TTE_GET_PA(translations[i].om_tte), TTE_KERNEL | VTD_4M, 0);
+ TTE_GET_PA(translations[i].om_tte) | TTE_KERNEL | VTD_4M, 0);
} else {
for (off = 0; off < translations[i].om_size;
off += PAGE_SIZE) {
va = translations[i].om_start + off;
pa = TTE_GET_PA(translations[i].om_tte) + off;
tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va);
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0);
+ tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0);
}
}
}
@@ -623,21 +626,25 @@
boolean_t wired)
{
vm_paddr_t pa, opa;
- uint64_t tte_data, otte_data;
+ uint64_t tte_data, otte_data, *otte;
vm_page_t om;
int invlva;
#if 0
printf("ctx=%d va=%lx prot=%x wired=%x\n", pmap->pm_context,
va, prot, wired);
#endif
+ KASSERT(pmap->pm_context != 0,
+ ("inserting faultable entries into context 0 without backing hash"));
+
+ om = NULL;
vm_page_lock_queues();
- om = NULL;
PMAP_LOCK(pmap);
sched_pin();
tte_data = pa = VM_PAGE_TO_PHYS(m);
- otte_data = tsb_lookup_tte(va, pmap->pm_context);
+ otte = tte_hash_lookup(pmap->pm_hash, va);
+ otte_data = *otte;
opa = TTE_GET_PA(otte_data);
/*
* Mapping has not changed, must be protection or wiring change.
@@ -660,7 +667,7 @@
*/
if (otte_data & VTD_MANAGED) {
om = m;
- pa |= VTD_MANAGED;
+ tte_data |= VTD_MANAGED;
}
goto validate;
@@ -670,7 +677,7 @@
* handle validating new mapping.
*/
if (opa) {
- if (otte_data & VTD_W)
+ if (otte_data & VTD_WIRED)
pmap->pm_stats.wired_count--;
if (otte_data & VTD_MANAGED) {
om = PHYS_TO_VM_PAGE(opa);
@@ -697,19 +704,20 @@
* Now validate mapping with desired protection/wiring.
*/
if ((prot & VM_PROT_WRITE) != 0)
- tte_data |= (VTD_W|VTD_WR_PERM); /* XXX need to handle modify */
+ tte_data |= VTD_SW_W;
if ((prot & VM_PROT_EXECUTE) != 0)
tte_data |= VTD_X;
if (wired)
tte_data |= VTD_WIRED;
if (pmap == kernel_pmap)
tte_data |= TTE_KERNEL_MINFLAGS;
-
+ else
+ tte_data |= TTE_MINFLAGS;
+
if ((otte_data & ~(VTD_W|VTD_REF)) != tte_data) {
if (otte_data & VTD_V) {
invlva = FALSE;
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data,
- pmap->pm_context);
+ *otte = tte_data;
if (otte_data & VTD_REF) {
if (otte_data & VTD_MANAGED)
vm_page_flag_set(om, PG_REFERENCED);
@@ -726,14 +734,14 @@
if (invlva)
pmap_invalidate_page(pmap, va);
} else
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data,
- pmap->pm_context);
+ tte_hash_insert(pmap->pm_hash, va, tte_data);
}
sched_unpin();
+ PMAP_UNLOCK(pmap);
vm_page_unlock_queues();
- PMAP_UNLOCK(pmap);
+
}
@@ -741,7 +749,30 @@
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
vm_page_t mpte)
{
- UNIMPLEMENTED;
+ uint64_t pa;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+ PMAP_LOCK(pmap);
+ /*
+ * Enter on the PV list if part of our managed memory. Note that we
+ * raise IPL while manipulating pv_table since pmap_enter can be
+ * called at interrupt time.
+ */
+ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+ pmap_insert_entry(pmap, va, m);
+
+ pmap->pm_stats.resident_count++;
+
+ pa = VM_PAGE_TO_PHYS(m);
+
+ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+ pa |= VTD_MANAGED;
+
+ tte_hash_insert(pmap->pm_hash, va, pa | TTE_MINFLAGS);
+
+ PMAP_UNLOCK(pmap);
+
return (0);
}
@@ -752,13 +783,15 @@
vm_paddr_t
pmap_extract(pmap_t pmap, vm_offset_t va)
{
- vm_paddr_t rtval;
- UNIMPLEMENTED;
+ vm_paddr_t pa;
+ tte_t *tte;
PMAP_LOCK(pmap);
-
+
+ tte = tte_hash_lookup(pmap->pm_hash, va);
+ pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte));
PMAP_UNLOCK(pmap);
- return (rtval);
+ return (pa);
}
/*
@@ -769,7 +802,24 @@
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
- UNIMPLEMENTED;
+ tte_t *tte_data;
+ vm_page_t m;
+
+ m = NULL;
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+
+ tte_data = tte_hash_lookup(pmap->pm_hash, va);
+ if (tte_data != 0 &&
+ ((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
+ m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data));
+ vm_page_hold(m);
+ }
+
+ vm_page_unlock_queues();
+ PMAP_UNLOCK(pmap);
+
+ return (m);
}
void
@@ -797,6 +847,8 @@
pv_entry_high_water = 9 * (pv_entry_max / 10);
uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+ tte_hash_init();
+
}
/*
@@ -838,8 +890,15 @@
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t tva;
+#if 0
printf("pmap_invalidate_range(sva=%lx, eva=%lx)\n", sva, eva);
+#endif
/* XXX SUN4V_FIXME - oversimplified logic */
+
+ if (pmap->pm_context != 0) {
+ tsb_clear_range(&pmap->pm_tsb, sva, eva);
+ }
+
if (((sva & PAGE_MASK_4M) != 0) || ((eva & PAGE_MASK_4M) != 0)) {
for (tva = sva; tva < eva; tva += PAGE_SIZE_8K)
invlpg(tva, pmap->pm_context);
@@ -854,6 +913,10 @@
#ifdef SMP
#error __FUNCTION_ not implemented
#else
+ if (pmap->pm_context != 0) {
+ tsb_clear(&pmap->pm_tsb);
+ }
+
invlctx(pmap->pm_context);
#endif
}
@@ -870,7 +933,7 @@
boolean_t
pmap_is_prefaultable(pmap_t pmap, vm_offset_t va)
{
- return (pmap->pm_pdir[va >> (PT_SHIFT + PAGE_SHIFT)] ? TRUE : FALSE);
+ return (TRUE);
}
/*
@@ -879,7 +942,7 @@
void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0);
+ tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0);
}
/*
@@ -909,10 +972,10 @@
{
if ((va & PAGE_MASK_4M) == 0 &&
- tsb_get_tte(&kernel_td[TSB4M_INDEX], va, 0) != 0)
- tsb_set_tte(&kernel_td[TSB4M_INDEX], va, 0, 0, 0);
+ tsb_get_tte(&kernel_td[TSB4M_INDEX], va) != 0)
+ tsb_clear_tte(&kernel_td[TSB4M_INDEX], va);
else
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, 0, 0, 0);
+ tsb_clear_tte(&kernel_td[TSB8K_INDEX], va);
}
static void
@@ -1008,7 +1071,7 @@
{
if ((prot & VM_PROT_WRITE) == 0) {
if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
- tte_clear_phys_bit(m, VTD_WR_PERM|VTD_W);
+ tte_clear_phys_bit(m, VTD_SW_W | VTD_W);
} else {
pmap_remove_all(m);
}
@@ -1022,11 +1085,9 @@
pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
- /*
- * The kernel does not use page tables
- */
- pmap->pm_pdir = NULL;
+ pmap->pm_hash = NULL;
pmap->pm_active = 0;
+ pmap->pm_context = 0;
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1039,36 +1100,14 @@
void
pmap_pinit(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
- static int color;
- int i;
-
- PMAP_LOCK_INIT(pmap);
- if (pmap->pm_pdir == NULL)
- pmap->pm_pdir = (vm_offset_t *)kmem_alloc_nofault(kernel_map, NBPTD);
+ static int context = 1; /* XXX */
- /*
- * allocate the page directory page(s)
- */
- for (i = 0; i < NPGPTD;) {
- m = vm_page_alloc(NULL, color++,
- VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- if (m == NULL)
- VM_WAIT;
- else {
- ptdpg[i++] = m;
- }
- }
+ pmap->pm_context = context++;
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
+ KASSERT(context < PMAP_CONTEXT_MAX,
+ ("max context limit hit - need to implement context recycling"));
- for (i = 0; i < NPGPTD; i++) {
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
- }
-
-
+ pmap->pm_hash = tte_hash_create(pmap->pm_context);
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1081,12 +1120,13 @@
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
- UNIMPLEMENTED;
-#ifdef notyet
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
+
int anychanged;
+ uint64_t *tte;
+
+
+ KASSERT(pmap->pm_context != 0,
+ ("protection downgrades not handled correctly without backing hash"));
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
@@ -1099,28 +1139,43 @@
anychanged = 0;
vm_page_lock_queues();
+ PMAP_LOCK(pmap);
sched_pin();
- PMAP_LOCK(pmap);
- for (; sva < eva; sva = va_next) {
- unsigned obits, pbits, pdirindex;
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
+ for (; sva < eva; sva += PAGE_SIZE) {
+ uint64_t otte_data, tte_data;
+ vm_page_t m;
+ retry:
+ tte = tte_hash_lookup(pmap->pm_hash, sva);
+ otte_data = tte_data = tte ? *tte : 0;
+ if (tte_data & VTD_MANAGED) {
+ m = NULL;
+ if (tte_data & VTD_REF) {
+ m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
+ vm_page_flag_set(m, PG_REFERENCED);
+ tte_data &= ~VTD_REF;
+ }
+ if ((tte_data & VTD_W) && pmap_track_modified(pmap, sva)) {
+ m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
+ vm_page_dirty(m);
+ }
+ }
+
+ tte_data &= ~(VTD_SW_W | VTD_W);
- if (pdnxt > eva)
- pdnxt = eva;
+ if (tte_data != otte_data) {
+ if (!atomic_cmpset_long(tte, otte_data, tte_data))
+ goto retry;
+ anychanged = 1;
+ }
}
+
sched_unpin();
- vm_page_unlock_queues();
if (anychanged)
pmap_invalidate_all(pmap);
+ vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
-#endif
}
/*
@@ -1169,29 +1224,12 @@
void
pmap_release(pmap_t pmap)
{
- vm_page_t m, ptdpg[NPGPTD];
- int i;
-
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
pmap_lazyfix(pmap);
-
- for (i = 0; i < NPGPTD; i++)
- ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(((vm_offset_t)pmap->pm_pdir) + PAGE_SIZE));
-
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-
- vm_page_lock_queues();
- for (i = 0; i < NPGPTD; i++) {
- m = ptdpg[i];
-
- m->wire_count--;
- atomic_subtract_int(&cnt.v_wire_count, 1);
- vm_page_free_zero(m);
- }
- vm_page_unlock_queues();
+ tte_hash_destroy(pmap->pm_hash);
PMAP_LOCK_DESTROY(pmap);
}
@@ -1201,93 +1239,33 @@
void
pmap_remove(pmap_t pmap, vm_offset_t start, vm_offset_t end)
{
- UNIMPLEMENTED;
-#ifdef notyet
- vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
- pt_entry_t *pte;
- int anyvalid;
-
+ int invlva, tinvlva;
+ vm_offset_t tva;
/*
* Perform an unsynchronized read. This is, however, safe.
*/
if (pmap->pm_stats.resident_count == 0)
return;
- anyvalid = 0;
-
vm_page_lock_queues();
sched_pin();
PMAP_LOCK(pmap);
- if (pmap == kernel_pmap) {
- /* only need to clear page from the appropriate TSB */
-
- }
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if (sva + PAGE_SIZE == eva) {
- pmap_remove_page(pmap, sva);
- goto out;
- }
-
- for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
-
- /*
- * Calculate index for next page table.
- */
- pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pmap->pm_stats.resident_count == 0)
- break;
-
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
-
- /*
- * Weed out invalid mappings. Note: we assume that the page
- * directory table is always allocated, and in kernel virtual.
- */
- if (ptpaddr == 0)
- continue;
-
- /*
- * Limit our scan to either the end of the va represented
- * by the current page table page, or to the end of the
- * range being removed.
- */
- if (pdnxt > eva)
- pdnxt = eva;
-
- /* XXX SUN4V_FIXME
- * Have not yet decided whether or not to use mapped pages for
- * page table pages
- * In any event pmap_pte_quick needs to go
- */
- for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
- sva += PAGE_SIZE) {
- if (*pte == 0)
- continue;
- anyvalid = 1;
- if (pmap_remove_pte(pmap, pte, sva))
- break;
+ if (pmap->pm_context != 0) {
+ invlva = 0;
+ for (tva = start; tva < end; tva += PAGE_SIZE) {
+ tinvlva = tte_hash_delete(pmap->pm_hash, tva);
+ invlva = tinvlva ? tinvlva : invlva;
}
-
-
-
-
-
+ } else {
+ tsb_clear_range(&pmap->pm_tsb, start, end);
+ invlva = 1;
}
-out:
sched_unpin();
vm_page_unlock_queues();
- if (anyvalid)
+ if (invlva)
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
-#endif
+
}
/*
@@ -1306,7 +1284,44 @@
void
pmap_remove_all(vm_page_t m)
{
- UNIMPLEMENTED;
+ pv_entry_t pv;
+ uint64_t *tte, tte_data;
+
+ mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ sched_pin();
+ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+ PMAP_LOCK(pv->pv_pmap);
+ pv->pv_pmap->pm_stats.resident_count--;
+ KASSERT(pv->pv_pmap->pm_context != 0,
+ ("cannot special case absence of backing hash"));
+ tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va);
+ tte_data = *tte;
+ tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va);
+ if (tte_data & VTD_WIRED)
+ pv->pv_pmap->pm_stats.wired_count--;
+ if (tte_data & VTD_REF)
+ vm_page_flag_set(m, PG_REFERENCED);
+
+ /*
+ * Update the vm_page_t clean and reference bits.
+ */
+ if (tte_data & VTD_W) {
+ KASSERT((tte_data & VTD_SW_W),
+ ("pmap_remove_all: modified page not writable: va: %lx, tte: %lx",
+ pv->pv_va, tte_data));
+ if (pmap_track_modified(pv->pv_pmap, pv->pv_va))
+ vm_page_dirty(m);
+ }
+
+ pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+ TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ m->md.pv_list_count--;
+ PMAP_UNLOCK(pv->pv_pmap);
+ free_pv_entry(pv);
+ }
+ vm_page_flag_clear(m, PG_WRITEABLE);
+ sched_unpin();
}
static void
@@ -1338,12 +1353,70 @@
void
-pmap_remove_pages(pmap_t pmap, vm_offset_t start, vm_offset_t end)
+pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- UNIMPLEMENTED;
+
+ vm_page_t m;
+ pv_entry_t pv, npv;
+ tte_t *tte, tte_data;
+
+ KASSERT(pmap->pm_context != 0,
+ ("cannot special case absence of backing hash"));
+
+ vm_page_lock_queues();
+ PMAP_LOCK(pmap);
+ sched_pin();
+ for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
+
+ if (pv->pv_va >= eva || pv->pv_va < sva) {
+ npv = TAILQ_NEXT(pv, pv_plist);
+ continue;
+ }
+ tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va);
+ tte_data = tte ? *tte : 0;
+
+ if (tte_data == 0) {
+ printf("TTE at %p IS ZERO @ VA %016lx\n",
+ tte, pv->pv_va);
+ panic("bad tte");
+ }
+
+ /*
+ * We cannot remove wired pages from a
+ * process' mapping at this time
+ */
+
+ if (tte_data & VTD_WIRED) {
+ npv = TAILQ_NEXT(pv, pv_plist);
>>> TRUNCATED FOR MAIL (1000 lines) <<<
More information about the p4-projects
mailing list