PERFORCE change 96341 for review

Kip Macy kmacy at FreeBSD.org
Sat Apr 29 02:00:48 UTC 2006


http://perforce.freebsd.org/chv.cgi?CH=96341

Change 96341 by kmacy at kmacy_storage:sun4v_rwbuf on 2006/04/29 02:00:02

	lock hash buckets during updates to prevent races with TSB miss handlers
	fix pmap_kextract lookups for second 4MB page of nucleus memory
	recycle contexts

Affected files ...

.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 edit

Differences ...

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 (text+ko) ====

@@ -16,13 +16,13 @@
 
 void tte_hash_destroy(tte_hash_t th);
 
-int tte_hash_delete(tte_hash_t hash, vm_offset_t va);
+int tte_hash_delete(tte_hash_t hash, vm_offset_t va, int locked);
 
 void tte_hash_delete_all(tte_hash_t hash);
 
 void tte_hash_insert(tte_hash_t hash, vm_offset_t va, tte_t data);
 
-tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va);
+tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va, int leave_locked);
 
 uint64_t tte_hash_set_scratchpad_kernel(tte_hash_t th);
 

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 (text+ko) ====

@@ -127,7 +127,11 @@
 static struct vm_object pvzone_obj;
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 int pmap_debug = 0;
-static int context = 1; /* XXX */
+
+static struct mtx pmap_ctx_lock;
+static uint16_t ctx_stack[PMAP_CONTEXT_MAX];
+static int ctx_stack_top; 
+
 static int permanent_mappings = 0;
 static uint64_t nucleus_memory;
 static uint64_t nucleus_mappings[2];
@@ -230,6 +234,31 @@
 		return (0);
 }
 
+static __inline void
+free_context(uint16_t ctx)
+{
+	mtx_lock_spin(&pmap_ctx_lock);
+	ctx_stack[ctx_stack_top++] = ctx;
+	mtx_unlock_spin(&pmap_ctx_lock);
+
+	KASSERT(ctx_stack_top < PMAP_CONTEXT_MAX, 
+		("context stack overrun - system error"));
+}
+
+static __inline uint16_t
+get_context(void)
+{
+	uint16_t ctx;
+
+	mtx_lock_spin(&pmap_ctx_lock);
+	ctx = ctx_stack[--ctx_stack_top];
+	mtx_unlock_spin(&pmap_ctx_lock);
+
+	KASSERT(ctx_stack_top > 0,
+		("context stack underrun - need to implement context stealing"));
+
+	return ctx;
+}
 
 static __inline void
 free_pv_entry(pv_entry_t pv)
@@ -290,9 +319,9 @@
 				continue;
 			pmap->pm_stats.resident_count--;
 
-			tte = tte_hash_lookup(pmap->pm_hash, va);
+			tte = tte_hash_lookup(pmap->pm_hash, va, TRUE);
 			tte_data = *tte;
-			tte_hash_delete(pmap->pm_hash, va);
+			tte_hash_delete(pmap->pm_hash, va, TRUE);
 
 			KASSERT((tte_data & VTD_WIRED) == 0,
 			    ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data));
@@ -377,12 +406,9 @@
 	pmap->pm_active |= 1;
 	pmap->pm_tlbactive |= 1;
 #endif
-#if 0
-	tsb_clear(&pmap->pm_tsb);
-	tte_hash_clear(pmap->pm_hash);
-#endif
+	/* XXX Is this necessary? */
+	pmap_invalidate_all(pmap);
 
-	pmap->pm_context = context++;
 	pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context);
 	pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb);
 	PCPU_SET(curpmap, pmap);
@@ -515,10 +541,6 @@
 			for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j],
 				     phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2)
 				;
-#if 0
-			for (i = 0; phys_avail_tmp[i + 1] != 0; i++) 
-				phys_avail[i] = phys_avail_tmp[i];
-#endif
 		}  
 	}
 
@@ -696,6 +718,7 @@
 	pm->pm_tlbactive = ~0;
 
 	PMAP_LOCK_INIT(kernel_pmap);
+
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 
 	error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td));
@@ -820,10 +843,10 @@
 		tte_t *src_tte, *dst_tte, tte_data;
 		vm_page_t m;
 
-		src_tte = tte_hash_lookup(src_pmap->pm_hash, addr);
+		src_tte = tte_hash_lookup(src_pmap->pm_hash, addr, FALSE);
 		tte_data = src_tte ? *src_tte : 0;
 		if ((tte_data & VTD_MANAGED) != 0) {
-			if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr)) == NULL) {
+			if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr, FALSE)) == NULL) {
 				m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
 				tte_hash_insert(dst_pmap->pm_hash, addr, tte_data & ~(VTD_W|VTD_REF));
 				dst_pmap->pm_stats.resident_count++;
@@ -877,7 +900,7 @@
 	sched_pin();
 
 	tte_data = pa = VM_PAGE_TO_PHYS(m);
-	otte = tte_hash_lookup(pmap->pm_hash, va);
+	otte = tte_hash_lookup(pmap->pm_hash, va, TRUE);
 	otte_data = otte ? *otte : 0;
 	opa = TTE_GET_PA(otte_data);
 	/*
@@ -1037,12 +1060,10 @@
 {
 	vm_paddr_t pa;
 	tte_t *tte;
-	PMAP_LOCK(pmap);
 
-	tte = tte_hash_lookup(pmap->pm_hash, va);
+	tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
 	pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte));
 
-	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
@@ -1061,7 +1082,7 @@
 	vm_page_lock_queues();
 	PMAP_LOCK(pmap);
 	sched_pin();
-	tte_data = tte_hash_lookup(pmap->pm_hash, va);
+	tte_data = tte_hash_lookup(pmap->pm_hash, va, FALSE);
 	if (tte_data != 0 && 
 	    ((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
 		m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data));
@@ -1083,9 +1104,15 @@
 void 
 pmap_init(void)
 {
+
 	/* allocate pv_entry zones */
 	int shpgperproc = PMAP_SHPGPERPROC;
 
+	for (ctx_stack_top = 1; ctx_stack_top < PMAP_CONTEXT_MAX; ctx_stack_top++) 
+		ctx_stack[ctx_stack_top] = ctx_stack_top;
+
+	mtx_init(&pmap_ctx_lock, "ctx lock", NULL, MTX_SPIN);
+
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
@@ -1155,11 +1182,10 @@
 
 	if (cpumask == pmap->pm_tlbactive)
 		return;
-#if 0	
+
 	if (pmap != kernel_pmap)
 		active = (pmap->pm_tlbactive & ~cpumask);
 	else 
-#endif
 		active = PCPU_GET(other_cpus);
 
 #if 1
@@ -1315,7 +1341,7 @@
 boolean_t 
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t va)
 {
-	return (tte_hash_lookup(pmap->pm_hash, va) == NULL);
+	return (tte_hash_lookup(pmap->pm_hash, va, FALSE) == NULL);
 }
 
 /*
@@ -1342,12 +1368,12 @@
 	if (va > KERNBASE && va < KERNBASE + nucleus_memory) {
 		uint64_t offset;
 		offset = va - KERNBASE; 
-		pa = nucleus_mappings[offset >> 22] + offset;
+		pa = nucleus_mappings[offset >> 22] | (va & PAGE_MASK_4M);
 	}
 	if ((pa == 0) && (tte_data = tsb_lookup_tte(va, 0)) != 0)
 		pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data));
 
-	if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va)) != NULL)
+	if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va, FALSE)) != NULL)
 		pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte));
 
 	return pa;
@@ -1356,7 +1382,7 @@
 static void
 pmap_kremove(vm_offset_t va)
 { 
-	tte_hash_delete(kernel_pmap->pm_hash, va);
+	tte_hash_delete(kernel_pmap->pm_hash, va, FALSE);
 }
 
 static void
@@ -1486,11 +1512,8 @@
 pmap_pinit(pmap_t pmap)
 {
 
-	pmap->pm_context = context++;
+	pmap->pm_context = get_context();
 
-	KASSERT(context < PMAP_CONTEXT_MAX, 
-		("max context limit hit - need to implement context recycling"));
-
 	pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch);
 	pmap->pm_tsb_ra = tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch);
 	pmap->pm_active = 0;
@@ -1532,7 +1555,7 @@
 		uint64_t otte_data, tte_data;
 		vm_page_t m;
 	retry:
-		tte = tte_hash_lookup(pmap->pm_hash, tva);
+		tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE);
 		otte_data = tte_data = tte ? *tte : 0;
 		if (tte_data & VTD_MANAGED) {
 			m = NULL;
@@ -1617,6 +1640,7 @@
 	pmap_lazyfix(pmap);
 	tsb_deinit(&pmap->pm_tsb);
 	tte_hash_destroy(pmap->pm_hash);
+	free_context(pmap->pm_context);
 	PMAP_LOCK_DESTROY(pmap);
 }
 
@@ -1640,10 +1664,10 @@
 	sched_pin();
 	PMAP_LOCK(pmap);
 	for (tva = start; tva < end; tva += PAGE_SIZE) {
-		if ((tte = tte_hash_lookup(pmap->pm_hash, tva)) == NULL)
+		if ((tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE)) == NULL)
 			continue;
 		pmap_remove_tte(pmap, tte, tva);
-		tte_hash_delete(pmap->pm_hash, tva);
+		tte_hash_delete(pmap->pm_hash, tva, TRUE);
 
 		invlva = 1;
 	}
@@ -1684,9 +1708,9 @@
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		PMAP_LOCK(pv->pv_pmap);
 		pv->pv_pmap->pm_stats.resident_count--;
-		tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va);
+		tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, TRUE);
 		tte_data = *tte;
-		tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va);
+		tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va, TRUE);
 		if (tte_data & VTD_WIRED)
 			pv->pv_pmap->pm_stats.wired_count--;
 		if (tte_data & VTD_REF)
@@ -1755,7 +1779,7 @@
 	PMAP_LOCK(pmap);
 	sched_pin();
 	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
-		tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va);
+		tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va, FALSE);
 		tte_data = tte ? *tte : 0;
 		
 		if (tte_data == 0) {
@@ -1780,7 +1804,6 @@
 			vm_page_dirty(m);
 		}
 		
-		
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 (text+ko) ====

@@ -64,7 +64,7 @@
 	if ((m->flags & PG_FICTITIOUS) ||
 	    (flags == VTD_SW_W && (m->flags & PG_WRITEABLE) == 0))
 		return;
-		sched_pin();
+	sched_pin();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
@@ -79,7 +79,7 @@
 				continue;
 		}
 		PMAP_LOCK(pv->pv_pmap);
-		tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va);
+		tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, FALSE);
 	retry:
 		tte_data = *tte;
 		if (tte_data & flags) {
@@ -120,12 +120,12 @@
 {
 	tte_t *tte;
 
-	tte = tte_hash_lookup(pmap->pm_hash, va);
+	tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
 
-	if (tte)
+	if (tte) {
 		atomic_clear_long((u_long *)tte, flags);
-
-	pmap_invalidate_page(pmap, va);
+		pmap_invalidate_page(pmap, va);
+	}
 }
 
 void 
@@ -139,7 +139,7 @@
 {
 	tte_t ttedata, *tte;
 	
-	tte = tte_hash_lookup(pmap->pm_hash, va);
+	tte = tte_hash_lookup(pmap->pm_hash, va, FALSE);
 	
 	ttedata = tte ? *tte : 0; 
 

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 (text+ko) ====

@@ -110,13 +110,37 @@
 
 }
 
-static void
+static __inline void
 free_tte_hash(tte_hash_t th)
 {
 	tte_hash_count--;
 	uma_zfree(thzone, th);
 }
 
+static void
+hash_bucket_lock(tte_hash_field_t fields) 
+{
+	uint64_t data;
+	
+	data = fields[0].tte.data & ~VTD_LOCK;
+	while (atomic_cmpset_long(&fields[0].tte.data, data, data | VTD_LOCK))
+		data = fields[0].tte.data & ~VTD_LOCK;
+
+	membar(StoreLoad);
+		
+}
+
+static __inline void
+hash_bucket_unlock(tte_hash_field_t fields) 
+{
+#ifdef DEBUG
+	if ((fields[0].tte.data & VTD_LOCK) == 0)
+		panic("trying to unlock bucket that isn't locked");
+#endif
+	fields[0].tte.data &= ~VTD_LOCK;
+	membar(StoreLoad);
+}
+
 void 
 tte_hash_init(vm_paddr_t bootmem)
 {
@@ -220,8 +244,9 @@
 	free_tte_hash(th);
 }
 
+
 int
-tte_hash_delete(tte_hash_t th, vm_offset_t va)
+tte_hash_delete(tte_hash_t th, vm_offset_t va, int locked)
 {
 	uint64_t hash_shift, hash_index;
 	tte_hash_field_t fields;
@@ -232,9 +257,12 @@
 	hash_index = (va >> hash_shift) & HASH_MASK(th);
 	fields = (th->th_hashtable[hash_index].the_fields);
 
-#ifdef DEBUG
-	KASSERT(tte_hash_lookup(th, va) != 0, ("attempting to delete non-existent entry"));
+#if 0
+	KASSERT(tte_hash_lookup(th, va, FALSE) != 0, ("attempting to delete non-existent entry"));
 #endif	
+	if (locked == FALSE)
+		hash_bucket_lock(fields);
+
 	for (i = 0; i <= 3; i++) 
 		if ((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M)) 
 			break;
@@ -252,6 +280,9 @@
 	fields[lastindex].tte.tag = 0;
 	fields[lastindex].tte.data = 0;
 	
+	if (lastindex != 0)
+		hash_bucket_unlock(fields);
+
 	return (vaindex < 4);
 }
 
@@ -269,21 +300,25 @@
 	uint64_t hash_shift, hash_index, tte_tag;
 	tte_hash_field_t fields;
 	int i;
+
+	
+#if 0
 	tte_t *tte;
-	
-	tte = tte_hash_lookup(th, va);
+	tte = tte_hash_lookup(th, va, FALSE);
 	if (tte)
 		panic("mapping for va=0x%lx already exists tte_data=0x%lx\n", va, *tte);
-	
+#endif
 	/* XXX - only handle 8K pages for now */
 	hash_shift = PAGE_SHIFT;
 	hash_index = (va >> hash_shift) & HASH_MASK(th);
 	fields = (th->th_hashtable[hash_index].the_fields);
+
+	hash_bucket_lock(fields);
 	tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT));
 
 	for (i = 0; i <= 3; i++) {
 		if ((fields[i].tte.tag == 0) || (fields[i].tte.tag == tte_tag)) {
-			fields[i].tte.data = tte_data;
+			fields[i].tte.data = tte_data | (i ? 0 : VTD_LOCK);
 			fields[i].tte.tag = tte_tag;
 			goto done;
 		} 
@@ -292,31 +327,46 @@
 	panic("collision handling unimplemented - please re-consider");
 	
 done:
+	hash_bucket_unlock(fields);
 	th->th_entries++;
 }
 
+/* 
+ * If leave_locked is true the tte's data field will be returned to
+ * the caller with the hash bucket left locked
+ */
+
+
 tte_t *
-tte_hash_lookup(tte_hash_t th, vm_offset_t va)
+tte_hash_lookup(tte_hash_t th, vm_offset_t va, int leave_locked)
 {
 	uint64_t hash_shift, hash_index;
 	tte_hash_field_t fields;
 	int i;
+	tte_t *entry;
 	/* XXX - only handle 8K pages for now */
 
 	hash_shift = PAGE_SHIFT;
 	hash_index = (va >> hash_shift) & HASH_MASK(th);
 	fields = (th->th_hashtable[hash_index].the_fields);
-	
+	entry = NULL;
+
+	hash_bucket_lock(fields);
 	for (i = 0; i <= 3; i++) {
 		if (((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M)) &&
-		    (fields[i].tte.data != 0))
-			return &(fields[i].tte.data);
+		    (fields[i].tte.data != 0)) {
+			entry = &(fields[i].tte.data);
+			break;
+		}
 	}
-	/* 
+	if (entry == NULL || leave_locked == FALSE)
+		hash_bucket_unlock(fields);
+	
+        /* 
 	 * XXX handle the case of collisions > 3
 	 *
 	 */
-	return (NULL);
+	return (entry);
 }
 
 
@@ -331,7 +381,7 @@
 	hash_scratch = ((vm_offset_t)th->th_hashtable) | ((vm_offset_t)th->th_size);
 	set_hash_kernel_scratchpad(hash_scratch);
 	
-	return hash_scratch;
+	return (hash_scratch);
 }
 
 uint64_t


More information about the p4-projects mailing list