PERFORCE change 96341 for review
Kip Macy
kmacy at FreeBSD.org
Sat Apr 29 02:00:48 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=96341
Change 96341 by kmacy at kmacy_storage:sun4v_rwbuf on 2006/04/29 02:00:02
lock hash buckets during updates to prevent races with TSB miss handlers
fix pmap_kextract lookups for second 4MB page of nucleus memory
recycle contexts
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 (text+ko) ====
@@ -16,13 +16,13 @@
void tte_hash_destroy(tte_hash_t th);
-int tte_hash_delete(tte_hash_t hash, vm_offset_t va);
+int tte_hash_delete(tte_hash_t hash, vm_offset_t va, int locked);
void tte_hash_delete_all(tte_hash_t hash);
void tte_hash_insert(tte_hash_t hash, vm_offset_t va, tte_t data);
-tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va);
+tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va, int leave_locked);
uint64_t tte_hash_set_scratchpad_kernel(tte_hash_t th);
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 (text+ko) ====
@@ -127,7 +127,11 @@
static struct vm_object pvzone_obj;
static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
int pmap_debug = 0;
-static int context = 1; /* XXX */
+
+static struct mtx pmap_ctx_lock;
+static uint16_t ctx_stack[PMAP_CONTEXT_MAX];
+static int ctx_stack_top;
+
static int permanent_mappings = 0;
static uint64_t nucleus_memory;
static uint64_t nucleus_mappings[2];
@@ -230,6 +234,31 @@
return (0);
}
+static __inline void
+free_context(uint16_t ctx)
+{
+ mtx_lock_spin(&pmap_ctx_lock);
+ ctx_stack[ctx_stack_top++] = ctx;
+ mtx_unlock_spin(&pmap_ctx_lock);
+
+ KASSERT(ctx_stack_top < PMAP_CONTEXT_MAX,
+ ("context stack overrun - system error"));
+}
+
+static __inline uint16_t
+get_context(void)
+{
+ uint16_t ctx;
+
+ mtx_lock_spin(&pmap_ctx_lock);
+ ctx = ctx_stack[--ctx_stack_top];
+ mtx_unlock_spin(&pmap_ctx_lock);
+
+ KASSERT(ctx_stack_top > 0,
+ ("context stack underrun - need to implement context stealing"));
+
+ return ctx;
+}
static __inline void
free_pv_entry(pv_entry_t pv)
@@ -290,9 +319,9 @@
continue;
pmap->pm_stats.resident_count--;
- tte = tte_hash_lookup(pmap->pm_hash, va);
+ tte = tte_hash_lookup(pmap->pm_hash, va, TRUE);
tte_data = *tte;
- tte_hash_delete(pmap->pm_hash, va);
+ tte_hash_delete(pmap->pm_hash, va, TRUE);
KASSERT((tte_data & VTD_WIRED) == 0,
("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data));
@@ -377,12 +406,9 @@
pmap->pm_active |= 1;
pmap->pm_tlbactive |= 1;
#endif
-#if 0
- tsb_clear(&pmap->pm_tsb);
- tte_hash_clear(pmap->pm_hash);
-#endif
+ /* XXX Is this necessary? */
+ pmap_invalidate_all(pmap);
- pmap->pm_context = context++;
pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
PCPU_SET(curpmap, pmap);
@@ -515,10 +541,6 @@
for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j],
phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2)
;
-#if 0
- for (i = 0; phys_avail_tmp[i + 1] != 0; i++)
- phys_avail[i] = phys_avail_tmp[i];
-#endif
}
}
@@ -696,6 +718,7 @@
pm->pm_tlbactive = ~0;
PMAP_LOCK_INIT(kernel_pmap);
+
TAILQ_INIT(&kernel_pmap->pm_pvlist);
error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td));
@@ -820,10 +843,10 @@
tte_t *src_tte, *dst_tte, tte_data;
vm_page_t m;
- src_tte = tte_hash_lookup(src_pmap->pm_hash, addr);
+ src_tte = tte_hash_lookup(src_pmap->pm_hash, addr, FALSE);
tte_data = src_tte ? *src_tte : 0;
if ((tte_data & VTD_MANAGED) != 0) {
- if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr)) == NULL) {
+ if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr, FALSE)) == NULL) {
m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
tte_hash_insert(dst_pmap->pm_hash, addr, tte_data & ~(VTD_W|VTD_REF));
dst_pmap->pm_stats.resident_count++;
@@ -877,7 +900,7 @@
sched_pin();
tte_data = pa = VM_PAGE_TO_PHYS(m);
- otte = tte_hash_lookup(pmap->pm_hash, va);
+ otte = tte_hash_lookup(pmap->pm_hash, va, TRUE);
otte_data = otte ? *otte : 0;
opa = TTE_GET_PA(otte_data);
/*
@@ -1037,12 +1060,10 @@
{
vm_paddr_t pa;
tte_t *tte;
- PMAP_LOCK(pmap);
- tte = tte_hash_lookup(pmap->pm_hash, va);
+ tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte));
- PMAP_UNLOCK(pmap);
return (pa);
}
@@ -1061,7 +1082,7 @@
vm_page_lock_queues();
PMAP_LOCK(pmap);
sched_pin();
- tte_data = tte_hash_lookup(pmap->pm_hash, va);
+ tte_data = tte_hash_lookup(pmap->pm_hash, va, FALSE);
if (tte_data != 0 &&
((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data));
@@ -1083,9 +1104,15 @@
void
pmap_init(void)
{
+
/* allocate pv_entry zones */
int shpgperproc = PMAP_SHPGPERPROC;
+ for (ctx_stack_top = 1; ctx_stack_top < PMAP_CONTEXT_MAX; ctx_stack_top++)
+ ctx_stack[ctx_stack_top] = ctx_stack_top;
+
+ mtx_init(&pmap_ctx_lock, "ctx lock", NULL, MTX_SPIN);
+
/*
* Initialize the address space (zone) for the pv entries. Set a
* high water mark so that the system can recover from excessive
@@ -1155,11 +1182,10 @@
if (cpumask == pmap->pm_tlbactive)
return;
-#if 0
+
if (pmap != kernel_pmap)
active = (pmap->pm_tlbactive & ~cpumask);
else
-#endif
active = PCPU_GET(other_cpus);
#if 1
@@ -1315,7 +1341,7 @@
boolean_t
pmap_is_prefaultable(pmap_t pmap, vm_offset_t va)
{
- return (tte_hash_lookup(pmap->pm_hash, va) == NULL);
+ return (tte_hash_lookup(pmap->pm_hash, va, FALSE) == NULL);
}
/*
@@ -1342,12 +1368,12 @@
if (va > KERNBASE && va < KERNBASE + nucleus_memory) {
uint64_t offset;
offset = va - KERNBASE;
- pa = nucleus_mappings[offset >> 22] + offset;
+ pa = nucleus_mappings[offset >> 22] | (va & PAGE_MASK_4M);
}
if ((pa == 0) && (tte_data = tsb_lookup_tte(va, 0)) != 0)
pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
- if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va)) != NULL)
+ if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va, FALSE)) != NULL)
pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte));
return pa;
@@ -1356,7 +1382,7 @@
static void
pmap_kremove(vm_offset_t va)
{
- tte_hash_delete(kernel_pmap->pm_hash, va);
+ tte_hash_delete(kernel_pmap->pm_hash, va, FALSE);
}
static void
@@ -1486,11 +1512,8 @@
pmap_pinit(pmap_t pmap)
{
- pmap->pm_context = context++;
+ pmap->pm_context = get_context();
- KASSERT(context < PMAP_CONTEXT_MAX,
- ("max context limit hit - need to implement context recycling"));
-
pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch);
pmap->pm_tsb_ra = tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch);
pmap->pm_active = 0;
@@ -1532,7 +1555,7 @@
uint64_t otte_data, tte_data;
vm_page_t m;
retry:
- tte = tte_hash_lookup(pmap->pm_hash, tva);
+ tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE);
otte_data = tte_data = tte ? *tte : 0;
if (tte_data & VTD_MANAGED) {
m = NULL;
@@ -1617,6 +1640,7 @@
pmap_lazyfix(pmap);
tsb_deinit(&pmap->pm_tsb);
tte_hash_destroy(pmap->pm_hash);
+ free_context(pmap->pm_context);
PMAP_LOCK_DESTROY(pmap);
}
@@ -1640,10 +1664,10 @@
sched_pin();
PMAP_LOCK(pmap);
for (tva = start; tva < end; tva += PAGE_SIZE) {
- if ((tte = tte_hash_lookup(pmap->pm_hash, tva)) == NULL)
+ if ((tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE)) == NULL)
continue;
pmap_remove_tte(pmap, tte, tva);
- tte_hash_delete(pmap->pm_hash, tva);
+ tte_hash_delete(pmap->pm_hash, tva, TRUE);
invlva = 1;
}
@@ -1684,9 +1708,9 @@
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
PMAP_LOCK(pv->pv_pmap);
pv->pv_pmap->pm_stats.resident_count--;
- tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va);
+ tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, TRUE);
tte_data = *tte;
- tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va);
+ tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va, TRUE);
if (tte_data & VTD_WIRED)
pv->pv_pmap->pm_stats.wired_count--;
if (tte_data & VTD_REF)
@@ -1755,7 +1779,7 @@
PMAP_LOCK(pmap);
sched_pin();
for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
- tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va);
+ tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va, FALSE);
tte_data = tte ? *tte : 0;
if (tte_data == 0) {
@@ -1780,7 +1804,6 @@
vm_page_dirty(m);
}
-
npv = TAILQ_NEXT(pv, pv_plist);
TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 (text+ko) ====
@@ -64,7 +64,7 @@
if ((m->flags & PG_FICTITIOUS) ||
(flags == VTD_SW_W && (m->flags & PG_WRITEABLE) == 0))
return;
- sched_pin();
+ sched_pin();
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
/*
* Loop over all current mappings setting/clearing as appropos If
@@ -79,7 +79,7 @@
continue;
}
PMAP_LOCK(pv->pv_pmap);
- tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va);
+ tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, FALSE);
retry:
tte_data = *tte;
if (tte_data & flags) {
@@ -120,12 +120,12 @@
{
tte_t *tte;
- tte = tte_hash_lookup(pmap->pm_hash, va);
+ tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
- if (tte)
+ if (tte) {
atomic_clear_long((u_long *)tte, flags);
-
- pmap_invalidate_page(pmap, va);
+ pmap_invalidate_page(pmap, va);
+ }
}
void
@@ -139,7 +139,7 @@
{
tte_t ttedata, *tte;
- tte = tte_hash_lookup(pmap->pm_hash, va);
+ tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
ttedata = tte ? *tte : 0;
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 (text+ko) ====
@@ -110,13 +110,37 @@
}
-static void
+static __inline void
free_tte_hash(tte_hash_t th)
{
tte_hash_count--;
uma_zfree(thzone, th);
}
+static void
+hash_bucket_lock(tte_hash_field_t fields)
+{
+ uint64_t data;
+
+ data = fields[0].tte.data & ~VTD_LOCK;
+ while (atomic_cmpset_long(&fields[0].tte.data, data, data | VTD_LOCK))
+ data = fields[0].tte.data & ~VTD_LOCK;
+
+ membar(StoreLoad);
+
+}
+
+static __inline void
+hash_bucket_unlock(tte_hash_field_t fields)
+{
+#ifdef DEBUG
+ if ((fields[0].tte.data & VTD_LOCK) == 0)
+ panic("trying to unlock bucket that isn't locked");
+#endif
+ fields[0].tte.data &= ~VTD_LOCK;
+ membar(StoreLoad);
+}
+
void
tte_hash_init(vm_paddr_t bootmem)
{
@@ -220,8 +244,9 @@
free_tte_hash(th);
}
+
int
-tte_hash_delete(tte_hash_t th, vm_offset_t va)
+tte_hash_delete(tte_hash_t th, vm_offset_t va, int locked)
{
uint64_t hash_shift, hash_index;
tte_hash_field_t fields;
@@ -232,9 +257,12 @@
hash_index = (va >> hash_shift) & HASH_MASK(th);
fields = (th->th_hashtable[hash_index].the_fields);
-#ifdef DEBUG
- KASSERT(tte_hash_lookup(th, va) != 0, ("attempting to delete non-existent entry"));
+#if 0
+ KASSERT(tte_hash_lookup(th, va, FALSE) != 0, ("attempting to delete non-existent entry"));
#endif
+ if (locked == FALSE)
+ hash_bucket_lock(fields);
+
for (i = 0; i <= 3; i++)
if ((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M))
break;
@@ -252,6 +280,9 @@
fields[lastindex].tte.tag = 0;
fields[lastindex].tte.data = 0;
+ if (lastindex != 0)
+ hash_bucket_unlock(fields);
+
return (vaindex < 4);
}
@@ -269,21 +300,25 @@
uint64_t hash_shift, hash_index, tte_tag;
tte_hash_field_t fields;
int i;
+
+
+#if 0
tte_t *tte;
-
- tte = tte_hash_lookup(th, va);
+ tte = tte_hash_lookup(th, va, FALSE);
if (tte)
panic("mapping for va=0x%lx already exists tte_data=0x%lx\n", va, *tte);
-
+#endif
/* XXX - only handle 8K pages for now */
hash_shift = PAGE_SHIFT;
hash_index = (va >> hash_shift) & HASH_MASK(th);
fields = (th->th_hashtable[hash_index].the_fields);
+
+ hash_bucket_lock(fields);
tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
for (i = 0; i <= 3; i++) {
if ((fields[i].tte.tag == 0) || (fields[i].tte.tag == tte_tag)) {
- fields[i].tte.data = tte_data;
+ fields[i].tte.data = tte_data | (i ? 0 : VTD_LOCK);
fields[i].tte.tag = tte_tag;
goto done;
}
@@ -292,31 +327,46 @@
panic("collision handling unimplemented - please re-consider");
done:
+ hash_bucket_unlock(fields);
th->th_entries++;
}
+/*
+ * If leave_locked is true the tte's data field will be returned to
+ * the caller with the hash bucket left locked
+ */
+
+
tte_t *
-tte_hash_lookup(tte_hash_t th, vm_offset_t va)
+tte_hash_lookup(tte_hash_t th, vm_offset_t va, int leave_locked)
{
uint64_t hash_shift, hash_index;
tte_hash_field_t fields;
int i;
+ tte_t *entry;
/* XXX - only handle 8K pages for now */
hash_shift = PAGE_SHIFT;
hash_index = (va >> hash_shift) & HASH_MASK(th);
fields = (th->th_hashtable[hash_index].the_fields);
-
+ entry = NULL;
+
+ hash_bucket_lock(fields);
for (i = 0; i <= 3; i++) {
if (((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M)) &&
- (fields[i].tte.data != 0))
- return &(fields[i].tte.data);
+ (fields[i].tte.data != 0)) {
+ entry = &(fields[i].tte.data);
+ break;
+ }
}
- /*
+ if (entry == NULL || leave_locked == FALSE)
+ hash_bucket_unlock(fields);
+
+ /*
* XXX handle the case of collisions > 3
*
*/
- return (NULL);
+ return (entry);
}
@@ -331,7 +381,7 @@
hash_scratch = ((vm_offset_t)th->th_hashtable) | ((vm_offset_t)th->th_size);
set_hash_kernel_scratchpad(hash_scratch);
- return hash_scratch;
+ return (hash_scratch);
}
uint64_t
More information about the p4-projects
mailing list