PERFORCE change 96722 for review

Kip Macy kmacy at FreeBSD.org
Sat May 6 00:34:26 UTC 2006


http://perforce.freebsd.org/chv.cgi?CH=96722

Change 96722 by kmacy at kmacy_storage:sun4v_rwbuf on 2006/05/06 00:33:31

	remove initial hand memory allocation for hash tables
	initial cut of arbitrary collision depth handling 

Affected files ...

.. //depot/projects/kmacy_sun4v/src/sys/sparc64/sparc64/genassym.c#19 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#12 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#47 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#45 edit
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#20 edit

Differences ...

==== //depot/projects/kmacy_sun4v/src/sys/sparc64/sparc64/genassym.c#19 (text+ko) ====

@@ -183,8 +183,10 @@
 ASSYM(VTD_REF, VTD_REF);
 ASSYM(VTD_W, VTD_W);
 ASSYM(VTD_SW_W, VTD_SW_W);
+ASSYM(VTD_LOCK, VTD_LOCK);
 
 ASSYM(THE_SHIFT, THE_SHIFT);
+ASSYM(TH_COLLISION_SHIFT, TH_COLLISION_SHIFT);
 ASSYM(HVTSB_PA, offsetof(struct hv_tsb_info, hvtsb_pa));
 ASSYM(PM_HASHSCRATCH, offsetof(struct pmap, pm_hashscratch));
 ASSYM(PM_TSBSCRATCH, offsetof(struct pmap, pm_tsbscratch));
@@ -351,3 +353,4 @@
 ASSYM(VM_MIN_DIRECT_ADDRESS, VM_MIN_DIRECT_ADDRESS);
 ASSYM(VM_MIN_PROM_ADDRESS, VM_MIN_PROM_ADDRESS);
 ASSYM(VM_MAX_PROM_ADDRESS, VM_MAX_PROM_ADDRESS);
+

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#12 (text+ko) ====

@@ -1,12 +1,14 @@
 #ifndef	_MACHINE_TTE_HASH_H_
 #define	_MACHINE_TTE_HASH_H_
 
-#define THE_SHIFT    6  /* size of hash entry is 64-bytes */
+#define THE_SHIFT    6          /* size of hash entry is 64-bytes */
+#define TH_COLLISION_SHIFT   47 /* bit 47 will never be set for a valid tag */
+#define TH_COLLISION       (1UL << TH_COLLISION_SHIFT)  
 
 struct tte_hash;
 typedef struct tte_hash *tte_hash_t;
 
-void tte_hash_init(vm_paddr_t);
+void tte_hash_init(void);
 
 void tte_hash_clear(tte_hash_t hash);
 

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#47 (text+ko) ====

@@ -491,7 +491,7 @@
 	.macro	spill_mixed_ktt1_sk
 	btst	1, %sp
 	bz,a,pt	%xcc, fault_32bit_sk
-	srl	%sp, 0, %sp
+	  srl	%sp, 0, %sp
 	ba,a,pt	%xcc, fault_64bit_sk
 	  nop
 	.align 128
@@ -1359,13 +1359,13 @@
 	andn	tmp, lock_bit, tmp ; \
 	stxa	tmp, [addr + 8]%asi ; 
 
-#define HASH_LOOKUP(addr, tag, searchtag, endlabel, matchlabel) \
+#define HASH_LOOKUP(addr, tag, searchtag, faillabel, matchlabel) \
 	ldda	[addr]%asi, tag	; \
 	cmp	tag, searchtag	; \
 	be,pn	%xcc, matchlabel ; \
 	  nop			; \
 	cmp	tag, %g0	; \
-	be,pn	%xcc, endlabel	; \
+	be,pn	%xcc, faillabel	; \
 	  nop
 
 #define RESTORE_TRAPWIN(pcpu, cansave, label1, label2) \
@@ -1518,18 +1518,30 @@
 	rdpr	%tt, %g3			! reload trap type
 
 tsb_miss_lookup_0:
-	HASH_LOOKUP(%g2, %l6, %l2, 7f, 8f)
+	HASH_LOOKUP(%g2, %l6, %l2, tsb_miss_not_found, 8f)
 tsb_miss_lookup_1:
 	add	%g2, 16, %g2
-	HASH_LOOKUP(%g2, %l6, %l2, 7f, 8f)
+	HASH_LOOKUP(%g2, %l6, %l2, tsb_miss_not_found, 8f)
 tsb_miss_lookup_2:
 	add	%g2, 16, %g2
-	HASH_LOOKUP(%g2, %l6, %l2, 7f, 8f)
+	HASH_LOOKUP(%g2, %l6, %l2, tsb_miss_not_found, 8f)
 tsb_miss_lookup_3:
 	add	%g2, 16, %g2
-	HASH_LOOKUP(%g2, %l6, %l2, 7f, 8f)
+	HASH_LOOKUP(%g2, %l6, %l2, tsb_miss_not_found, 8f)
+	mov	1, %g2
+	sethi	%uhi(VM_MIN_DIRECT_ADDRESS), %g3
+	cmp	%l3, ASI_LDTD_N
+	sllx	%g2, TH_COLLISION_SHIFT, %g2
+	sllx	%g3, 32, %g3
+	beq,pt	%xcc, 7f
+	  nop
+	andn	%l7, %g3, %l7			! generate real address
+7:
+	andcc	%l6, %g2, %g0
+	rdpr	%tt, %g3
+	bnz,a,pt %xcc, tsb_miss_lookup_0
+	  mov	%l7, %g2
 tsb_miss_not_found:	
-7:	! not found
 	! we need to jump to tl0_trap to drop us back down to tl0
 	! and take us to trap(...) to service the fault
 	wr	%g0, %l3, %asi
@@ -1588,9 +1600,13 @@
 	MAGIC_TRAP_ON;MAGIC_TRAP_ON;MAGIC_EXIT	! die if all we're doing 
 						! is storing same data
 #else
+	mov	%o0, %g1
+	mov	%o5, %g7
 	PUTCHAR(0x5a)
 	PUTCHAR(0x5a)
 	PUTCHAR(0x5a)
+	mov	%g1, %o0
+	mov	%g7, %o5
 	MAGIC_TRAP_ON; MAGIC_TRAP_OFF
 #endif
 12:

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#45 (text+ko) ====

@@ -106,7 +106,6 @@
 int sparc64_nmemreg;
 
 extern vm_paddr_t mmu_fault_status_area;
-vm_paddr_t proc0_mem;
 
 
 /*
@@ -631,7 +630,6 @@
 	 */
 	mmu_fault_status_area = pmap_bootstrap_alloc(MMFSA_SIZE*MAXCPU);
 
-	proc0_mem = pmap_bootstrap_alloc(PAGE_SIZE*4*400);
 	/*
 	 * Allocate and map the message buffer.
 	 */
@@ -924,19 +922,18 @@
 		}
 		goto validate;
 
-	}
-	/*
-	 * Mapping has changed, invalidate old range and fall through to
-	 * handle validating new mapping.
-	 */
-	if (opa) {
+	} else if (opa) {
+		/*
+		 * Mapping has changed, invalidate old range and fall through to
+		 * handle validating new mapping.
+		 */
 		if (otte_data & VTD_WIRED)
 			pmap->pm_stats.wired_count--;
 		if (otte_data & VTD_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
 			pmap_remove_entry(pmap, om, va);
 		}
-	} else
+	} else 
 		pmap->pm_stats.resident_count++;
 
 	/*
@@ -1118,7 +1115,7 @@
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
 
-	tte_hash_init(proc0_mem);
+	tte_hash_init();
 
 }
 

==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#20 (text+ko) ====

@@ -59,20 +59,21 @@
 #define HASH_MASK(th) ((th->th_size << (PAGE_SHIFT - THE_SHIFT)) - 1)
 #define HASH_VALID     0x1
 
+
 #define DEBUG
 
 struct tte_hash_entry;
 
-typedef union {
+#define MAX_FRAGMENT_ENTRIES ((PAGE_SIZE / sizeof(struct tte_hash_entry)) - 1)
+
+typedef union tte_hash_field_ {
 	struct {
 		uint64_t tag;
 		uint64_t data;
 	} tte;
 	struct {
-		uint16_t  flags;
-		uint16_t  count;
-		uint32_t  vapad; /* zero if this is a collision entry */
-		struct tte_hash_entry *next;
+		uint64_t  flags;
+		union tte_hash_field_ *next;
 	} of;
 } tte_hash_field, *tte_hash_field_t;
 
@@ -82,16 +83,33 @@
 } *tte_hash_entry_t;
 
 
+struct fragment_header {
+	struct fragment_header *fh_next;
+	uint16_t fh_count;
+	uint16_t fh_free_head;
+	uint16_t pad[26];
+};
+
+CTASSERT(sizeof(struct fragment_header) == sizeof(struct tte_hash_entry));
+
 struct tte_hash {
 	uint16_t th_size;               /* size in pages */
 	uint16_t th_context;            /* TLB context   */
 	uint32_t th_entries;            /* # pages held  */
 	tte_hash_entry_t th_hashtable;   /* hash of TTEs  */
+	struct tte_hash_fragment *th_fhhead;
+	struct tte_hash_fragment *th_fhtail;
+};
+
+struct tte_hash_fragment {
+	struct fragment_header thf_head;
+	struct tte_hash_entry  thf_entries[127];
 };
 
+CTASSERT(sizeof(struct tte_hash_fragment) == PAGE_SIZE);
+
+
 static struct tte_hash kernel_tte_hash;
-static vm_paddr_t proc0_mem;
-
 /*
  * Data for the tte_hash allocation mechanism
  */
@@ -146,13 +164,12 @@
 }
 
 void 
-tte_hash_init(vm_paddr_t bootmem)
+tte_hash_init(void)
 {
 	thzone = uma_zcreate("TTE_HASH", sizeof(struct tte_hash), NULL, NULL, 
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	tte_hash_max = maxproc;
 	uma_zone_set_obj(thzone, &thzone_obj, tte_hash_max);
-	proc0_mem = bootmem;
 }
 
 
@@ -178,7 +195,7 @@
 	tte_hash_t th;
 	vm_page_t m, tm;
 	int i;
-	static int proc0_mem_allocated;
+	static int color;
 	
 	th = get_tte_hash();
 	
@@ -187,15 +204,6 @@
 	th->th_context = (uint16_t)context;
 	m = NULL;
 
-
-	if (proc0_mem_allocated < 400) {
-
-		proc0_mem_allocated++;
-		th->th_hashtable = (void *)TLB_PHYS_TO_DIRECT(proc0_mem);
-		proc0_mem += PAGE_SIZE*HASH_SIZE;
-		goto done;
-	}
-
 	while (m == NULL) {
 		m = vm_page_alloc_contig(HASH_SIZE, 2*PAGE_SIZE_4M, 
 					 (1UL<<34), PAGE_SIZE, (1UL<<34));
@@ -208,9 +216,21 @@
 		if (tm->flags & PG_ZERO)
 			pmap_zero_page(tm);
 	}
+	th->th_hashtable = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
+	m = NULL;
+	while (m == NULL) {
+		m = vm_page_alloc(NULL, color++,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
 
-	th->th_hashtable = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
-done:
+		if (m == NULL) 
+			VM_WAIT;
+	}
+	if (m->flags & PG_ZERO)
+		pmap_zero_page(m);	
+
+	th->th_fhtail = th->th_fhhead = (void *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
+
 	*scratchval = (uint64_t)((vm_offset_t)th->th_hashtable) | ((vm_offset_t)th->th_size);
 
 	return (th);
@@ -231,6 +251,26 @@
 	field->tte.data = tte | (field->tte.data & VTD_LOCK);
 }
 
+static tte_hash_field_t 
+tte_hash_allocate_fragment_entry(tte_hash_t th, tte_hash_field_t field)
+{
+	struct tte_hash_fragment *fh;
+	tte_hash_field_t newfield;
+
+	fh = th->th_fhtail;
+	if (fh->thf_head.fh_count == MAX_FRAGMENT_ENTRIES) {
+		/* XXX allocate a new page */
+		panic("new fragment page allocation unimplemented");
+	} 
+	newfield = fh->thf_entries[++fh->thf_head.fh_free_head].the_fields;
+	fh->thf_head.fh_count++; 
+	tte_hash_set_field(&newfield[0], field->tte.tag, field->tte.data); 
+	field->of.flags = TH_COLLISION;
+	field->of.next = newfield;
+
+	return newfield;
+}
+
 static __inline tte_t 
 tte_hash_lookup_inline(tte_hash_t th, vm_offset_t va, tte_hash_field_t *field)
 {
@@ -245,13 +285,28 @@
 	fields = (th->th_hashtable[hash_index].the_fields);
 	entry = 0;
 
+retry:
 	for (i = 0; i < 4 && fields[i].tte.tag != 0; i++) {
 		if (((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M))) {
 			entry = (fields[i].tte.data & ~VTD_LOCK);
 			break;
 		}
 	}
-	if (field && i < 4)
+	if (i == 4) {
+		if (fields[3].of.flags & TH_COLLISION) {
+			printf("following next pointer looking up 0x%lx\n", va);
+			fields = fields[3].of.next;
+			goto retry;
+		}
+		printf("allocating fragment entry and shifting entry for tag=0x%lx data=0x%lx\n", 
+		       fields[3].tte.tag, fields[3].tte.data);
+		fields = tte_hash_allocate_fragment_entry(th, &fields[3]);
+		printf("new fragment address is %p\n", fields);
+		/* entry following shifted entry is the first unallocated */
+		i = 1;
+	}
+
+	if (field)
 		*field = &fields[i];
         /* 
 	 * XXX handle the case of collisions > 3
@@ -260,12 +315,41 @@
 }
 
 
+static __inline void
+tte_hash_lookup_last_inline(tte_hash_t th, vm_offset_t va, tte_hash_field_t *field)
+{
+	uint64_t hash_shift, hash_index;
+	tte_hash_field_t fields;
+	int i;
+	/* XXX - only handle 8K pages for now */
+
+	hash_shift = PAGE_SHIFT;
+	hash_index = (va >> hash_shift) & HASH_MASK(th);
+	fields = (th->th_hashtable[hash_index].the_fields);
+
+retry:
+	for (i = 0; i < 4 && fields[i + 1].tte.tag != 0; i++) 
+		;
+
+	if (i == 4) {
+		if (fields[3].of.flags & TH_COLLISION) {
+			fields = fields[3].of.next; 
+			goto retry;
+		}
+		/* if there is no collision pointer, 3 is the last entry */
+		i = 3;
+	}
+
+	if (field)
+		*field = &fields[i];
+}
+
+
 tte_t
 tte_hash_delete(tte_hash_t th, vm_offset_t va)
 {
 	uint64_t hash_shift, hash_index;
 	tte_hash_field_t fields, lookup_field, last_field;
-	int i;
 	tte_t tte_data;
 	
 	/* XXX - only handle 8K pages for now */
@@ -276,17 +360,14 @@
 
 	hash_bucket_lock(fields);
 	
-	tte_data = tte_hash_lookup_inline(th, va, &lookup_field);
-
-	if (tte_data == 0)
+	if ((tte_data = tte_hash_lookup_inline(th, va, &lookup_field)) == 0) 
 		goto done;
 
 	th->th_entries--;
 
-	for (i = 0; (i < 4) && (fields[i + 1].tte.tag != 0); i++) 
-		;
-	last_field = &fields[i];
+	tte_hash_lookup_last_inline(th, va, &last_field);
 
+	/* move last field's values in to the field we are deleting */
 	if (lookup_field != last_field) 
 		tte_hash_set_field(lookup_field, last_field->tte.tag, last_field->tte.data);
 	
@@ -378,7 +459,7 @@
 uint64_t
 tte_hash_set_scratchpad_user(tte_hash_t th, uint64_t context)
 {
-	
+
 	uint64_t hash_scratch;
 	/* This will break if a hash table ever grows above 64MB
 	 * 2^(13+13)


More information about the p4-projects mailing list