svn commit: r223700 - head/sys/ia64/ia64

Marcel Moolenaar marcel at FreeBSD.org
Thu Jun 30 20:34:55 UTC 2011


Author: marcel
Date: Thu Jun 30 20:34:55 2011
New Revision: 223700
URL: http://svn.freebsd.org/changeset/base/223700

Log:
  Change the management of nested faults by switching to physical
  addressing while reading or writing the trap frame. It's not
  possible to guarantee that the one translation cache entry that
  we depend on is not going to get purged by the CPU. We already
  know that global shootdowns (ptc.g and/or ptc.ga) can (and will)
  cause multiple TC entries to get purged and we initialize tried
  to handle that by serializing kernel entry with these operations.
  However, we need to serialize kernel exit as well.
  
  But even if we can serialize, it appears that CPU threads within
  a core can affect each other's TC entries beyond the global
  shootdown. This would mean serializing any and all translatation
  cache updates with the threads in a core with the kernel entry
  and exit of any thread in that core. This is just too painful
  and complicated.
  
  Since we already properly coded for the 2 nested faults that we
  can get, all we need to do is use those to obtain the physical
  address of the trap frame, switch to physical mode and in that
  way eliminate any further faults. The trap frame is already
  aligned to 1KB boundaries to make sure we don't cross the page
  boundary, this is safe to do.
  
  We still need to serialize ptc.g or ptc.ga across CPUs because
  the platform can only have 1 such operation outstanding at the
  same time. We can now use a regular (spin) lock for this.
  
  Also, it has been observed that we can get a nested TLB faults
  for region 7 virtual addresses. This was unexpected. For now,
  we enhance the nested TLB fault handler to deal with those as
  well, but it needs to be understood.

Modified:
  head/sys/ia64/ia64/exception.S
  head/sys/ia64/ia64/pmap.c

Modified: head/sys/ia64/ia64/exception.S
==============================================================================
--- head/sys/ia64/ia64/exception.S	Thu Jun 30 19:23:17 2011	(r223699)
+++ head/sys/ia64/ia64/exception.S	Thu Jun 30 20:34:55 2011	(r223700)
@@ -50,9 +50,6 @@ __FBSDID("$FreeBSD$");
 
 	.section .ivt.data, "aw"
 
-	.global pmap_ptc_g_sem
-pmap_ptc_g_sem:	data8	0
-
 	.global ia64_kptdir
 ia64_kptdir:	data8	0
 
@@ -151,58 +148,51 @@ ENTRY_NOPROFILE(exception_save, 0)
 }
 {	.mmi
 	mov		ar.rsc=0
-	sub		r19=r23,r30
-	add		r31=8,r30
-	;;
-}
-{	.mmi
 	mov		r22=cr.iip
-	nop		0
 	addl		r29=NTLBRT_SAVE,r0	// 22-bit restart token.
 	;;
 }
 
 	/*
-	 * We have a 1KB aligned trapframe, pointed to by sp. If we write
-	 * to the trapframe, we may trigger a data nested TLB fault. By
-	 * aligning the trapframe on a 1KB boundary, we guarantee that if
-	 * we get a data nested TLB fault, it will be on the very first
-	 * write. Since the data nested TLB fault does not preserve any
-	 * state, we have to be careful what we clobber. Consequently, we
-	 * have to be careful what we use here. Below a list of registers
-	 * that are currently alive:
+	 * We have a 1KB aligned trapframe, pointed to by r30. We can't
+	 * reliably write to the trapframe using virtual addressing, due
+	 * to the fact that TC entries we depend on can be removed by:
+	 * 1.  ptc.g instructions issued by other threads/cores/CPUs, or
+	 * 2.  TC modifications in another thread on the same core.
+	 * When our TC entry gets removed, we get nested TLB faults and
+	 * since no state is saved, we can only deal with those when
+	 * explicitly coded and expected.
+	 * As such, we switch to physical addressing and account for the
+	 * fact that the tpa instruction can cause a nested TLB fault.
+	 * Since the data nested TLB fault does not preserve any state,
+	 * we have to be careful what we clobber. Consequently, we have
+	 * to be careful what we use here. Below a list of registers that
+	 * are considered alive:
 	 *	r16,r17=arguments
 	 *	r18=pr, r19=length, r20=unat, r21=rsc, r22=iip, r23=TOS
-	 *	r29=restart point
-	 *	r30,r31=trapframe pointers
+	 *	r29=restart token
+	 *	r30=trapframe pointers
 	 *	p14,p15=memory stack switch
 	 */
-
-	/* PTC.G enter non-exclusive */
-	mov	r24 = ar.ccv
-	movl	r25 = pmap_ptc_g_sem
-	;;
-.ptc_g_0:
-	ld8.acq	r26 = [r25]
-	;;
-	tbit.nz	p12, p0 = r26, 63
-(p12)	br.cond.spnt.few .ptc_g_0
-	;;
-	mov	ar.ccv = r26
-	adds	r27 = 1, r26
+exception_save_restart:
+	tpa		r24=r30			// Nested TLB fault possible
+	sub		r19=r23,r30
+	nop		0
 	;;
-	cmpxchg8.rel	r27 = [r25], r27, ar.ccv
+
+	rsm		psr.dt
+	add		r29=16,r19		// Clobber restart token
+	mov		r30=r24
 	;;
-	cmp.ne	p12, p0 = r26, r27
-(p12)	br.cond.spnt.few .ptc_g_0
+	srlz.d
+	add		r31=8,r24
 	;;
-	mov	ar.ccv = r24
 
-exception_save_restart:
+	// r18=pr, r19=length, r20=unat, r21=rsc, r22=iip, r23=TOS
+	// r29=delta
 {	.mmi
 	st8		[r30]=r19,16		// length
 	st8		[r31]=r0,16		// flags
-	add		r29=16,r19		// Clobber restart token
 	;;
 }
 {	.mmi
@@ -218,6 +208,7 @@ exception_save_restart:
 	;;
 }
 	// r18=pr, r19=rnat, r20=bspstore, r21=rsc, r22=iip, r23=rp
+	// r24=pfs
 {	.mmi
 	st8		[r30]=r23,16		// rp
 	st8		[r31]=r18,16		// pr
@@ -275,7 +266,7 @@ exception_save_restart:
 	sub		r18=r18,r20
 	;;
 }
-	// r19=ifs, r22=iip
+	// r18=ndirty, r19=ifs, r22=iip
 {	.mmi
 	st8		[r31]=r18,16		// ndirty
 	st8		[r30]=r19,16		// cfm
@@ -431,27 +422,10 @@ exception_save_restart:
 	;;
 }
 {	.mlx
-	ssm		psr.ic|psr.dfh
+	ssm		psr.dt|psr.ic|psr.dfh
 	movl		gp=__gp
 	;;
 }
-
-	/* PTC.G leave non-exclusive */
-	srlz.d
-	movl	r25 = pmap_ptc_g_sem
-	;;
-.ptc_g_1:
-	ld8.acq r26 = [r25]
-	;;
-	mov	ar.ccv = r26
-	adds	r27 = -1, r26
-	;;
-	cmpxchg8.rel	r27 = [r25], r27, ar.ccv
-	;;
-	cmp.ne	p12, p0 = r26, r27
-(p12)	br.cond.spnt.few .ptc_g_1
-	;;
-
 {	.mib
 	srlz.d
 	nop		0
@@ -469,34 +443,52 @@ END(exception_save)
 ENTRY_NOPROFILE(exception_restore, 0)
 {	.mmi
 	rsm		psr.i
-	add		r3=SIZEOF_TRAPFRAME-16,sp
-	add		r2=SIZEOF_TRAPFRAME,sp
+	add		sp=16,sp
+	nop		0
 	;;
 }
-{	.mmi
+
+	// The next instruction can fault. Let it be...
+	tpa		r9=sp
+	;;
+	rsm		psr.dt|psr.ic
+	add		r8=SIZEOF_SPECIAL+16,r9
+	;;
 	srlz.d
-	add		r8=SIZEOF_SPECIAL+32,sp
-	nop		0
+	add		r2=SIZEOF_TRAPFRAME-16,r9
+	add		r3=SIZEOF_TRAPFRAME-32,r9
 	;;
-}
-	// The next load can trap. Let it be...
+
+{	.mmi
 	ldf.fill	f15=[r2],-32		// f15
 	ldf.fill	f14=[r3],-32		// f14
-	add		sp=16,sp
+	nop		0
 	;;
+}
+{	.mmi
 	ldf.fill	f13=[r2],-32		// f13
 	ldf.fill	f12=[r3],-32		// f12
+	nop		0
 	;;
+}
+{	.mmi
 	ldf.fill	f11=[r2],-32		// f11
 	ldf.fill	f10=[r3],-32		// f10
+	nop		0
 	;;
+}
+{	.mmi
 	ldf.fill	f9=[r2],-32		// f9
 	ldf.fill	f8=[r3],-32		// f8
+	nop		0
 	;;
+}
+{	.mmi
 	ldf.fill	f7=[r2],-24		// f7
 	ldf.fill	f6=[r3],-16		// f6
+	nop		0
 	;;
-
+}
 {	.mmi
 	ld8		r8=[r8]			// unat (after)
 	;;
@@ -553,53 +545,53 @@ ENTRY_NOPROFILE(exception_restore, 0)
 	bsw.0
 	;;
 }
+{	.mii
+	ld8		r16=[r9]		// tf_length
+	add		r31=16,r9
+	add		r30=24,r9
+}
 {	.mmi
 	ld8.fill	r15=[r3],-16		// r15
 	ld8.fill	r14=[r2],-16		// r14
-	add		r31=16,sp
+	nop		0
 	;;
 }
 {	.mmi
-	ld8		r16=[sp]		// tf_length
 	ld8.fill	r11=[r3],-16		// r11
-	add		r30=24,sp
-	;;
-}
-{	.mmi
 	ld8.fill	r10=[r2],-16		// r10
-	ld8.fill	r9=[r3],-16		// r9
 	add		r16=r16,sp		// ar.k7
 	;;
 }
 {	.mmi
+	ld8.fill	r9=[r3],-16		// r9
 	ld8.fill	r8=[r2],-16		// r8
-	ld8.fill	r3=[r3]			// r3
+	nop		0
 	;;
 }
-	// We want nested TLB faults from here on...
-	rsm		psr.ic|psr.i
+{	.mmi
+	ld8.fill	r3=[r3]			// r3
 	ld8.fill	r2=[r2]			// r2
 	nop		0
 	;;
-	srlz.d
-	ld8.fill	sp=[r31],16		// sp
-	nop		0
-	;;
+}
 
+	ld8.fill	sp=[r31],16		// sp
 	ld8		r17=[r30],16		// unat
-	ld8		r29=[r31],16		// rp
 	;;
+	ld8		r29=[r31],16		// rp
 	ld8		r18=[r30],16		// pr
+	;;
 	ld8		r28=[r31],16		// pfs
+	ld8		r20=[r30],24		// bspstore
 	mov		rp=r29
 	;;
-	ld8		r20=[r30],24		// bspstore
 	ld8		r21=[r31],24		// rnat
 	mov		ar.pfs=r28
 	;;
 	ld8.fill	r26=[r30],16		// tp
 	ld8		r22=[r31],16		// rsc
 	;;
+
 {	.mmi
 	ld8		r23=[r30],16		// fpsr
 	ld8		r24=[r31],16		// psr
@@ -636,6 +628,11 @@ ENTRY_NOPROFILE(exception_restore, 0)
 	addl		r29=NTLBRT_RESTORE,r0	// 22-bit restart token 
 	;;
 }
+
+	ssm		psr.dt
+	;;
+	srlz.d
+
 exception_restore_restart:
 {	.mmi
 	mov		r30=ar.bspstore
@@ -1015,15 +1012,33 @@ IVT_ENTRY(Data_Nested_TLB, 0x1400)
 	// here are direct mapped region 7 addresses, we have no problem
 	// constructing physical addresses.
 
-{	.mlx
+{	.mmi
+	mov		cr.ifa=r30
+	mov		r26=rr[r30]
+	extr.u		r27=r30,61,3
+	;;
+}
+{	.mii
 	nop		0
-	movl		r27=ia64_kptdir
+	dep		r26=0,r26,0,2
+	cmp.eq		p12,p13=7,r27
 	;;
 }
 {	.mii
-	ld8		r27=[r27]
-	extr.u		r28=r30,3*PAGE_SHIFT-8, PAGE_SHIFT-3	// dir L0 index
-	extr.u		r26=r30,2*PAGE_SHIFT-5, PAGE_SHIFT-3	// dir L1 index
+	mov		cr.itir=r26
+(p12)	dep		r28=0,r30,61,3
+(p13)	extr.u		r28=r30,3*PAGE_SHIFT-8, PAGE_SHIFT-3	// dir L0 index
+	;;
+}
+{	.mlx
+(p12)	add		r28=PTE_PRESENT+PTE_ACCESSED+PTE_DIRTY+PTE_PL_KERN+PTE_AR_RWX+PTE_MA_WB,r28
+(p13)	movl		r27=ia64_kptdir
+	;;
+}
+{	.mib
+(p13)	ld8		r27=[r27]
+(p13)	extr.u		r26=r30,2*PAGE_SHIFT-5, PAGE_SHIFT-3	// dir L1 index
+(p12)	br.cond.spnt.few 1f
 	;;
 }
 {	.mmi
@@ -1040,58 +1055,48 @@ IVT_ENTRY(Data_Nested_TLB, 0x1400)
 	extr.u		r28=r30,PAGE_SHIFT,PAGE_SHIFT-5		// pte index
 	;;
 }
-{	.mmi
+{	.mii
 	shladd		r27=r26,3,r27
+	shl		r28=r28,5
 	;;
-	mov		r26=rr[r30]
 	dep		r27=0,r27,61,3
 	;;
 }
-{	.mii
 	ld8		r27=[r27]				// pte page
-	shl		r28=r28,5
-	dep		r26=0,r26,0,2
 	;;
-}
-{	.mmi
 	add		r27=r28,r27
 	;;
-	mov		cr.ifa=r30
 	dep		r27=0,r27,61,3
 	;;
-}
-{	.mmi
-	ld8		r28=[r27]		// pte
+	ld8		r28=[r27]				// pte
 	;;
-	mov		cr.itir=r26
 	or		r28=PTE_DIRTY+PTE_ACCESSED,r28
 	;;
-}
-{	.mmi
 	st8		[r27]=r28
 	;;
-	addl		r26=NTLBRT_SAVE,r0
-	addl		r27=NTLBRT_RESTORE,r0
-}
+	ssm		psr.dt
+	;;
+1:
 {	.mmi
 	itc.d		r28
 	;;
-	ssm		psr.dt
-	cmp.eq		p12,p0=r29,r26
+	addl		r26=NTLBRT_SAVE,r0
+	addl		r27=NTLBRT_RESTORE,r0
 	;;
 }
-{	.mib
+{	.mmi
 	srlz.d
+	cmp.eq		p12,p0=r29,r26
 	cmp.eq		p13,p0=r29,r27
-(p12)	br.cond.sptk.few	exception_save_restart
 	;;
 }
-{	.mib
-	nop		0
+{	.mbb
 	nop		0
+(p12)	br.cond.sptk.few	exception_save_restart
 (p13)	br.cond.sptk.few	exception_restore_restart
 	;;
 }
+
 {	.mlx
 	mov		r26=ar.bsp
 	movl		r29=kstack

Modified: head/sys/ia64/ia64/pmap.c
==============================================================================
--- head/sys/ia64/ia64/pmap.c	Thu Jun 30 19:23:17 2011	(r223699)
+++ head/sys/ia64/ia64/pmap.c	Thu Jun 30 20:34:55 2011	(r223700)
@@ -179,7 +179,7 @@ static uint64_t pmap_ptc_e_count2 = 2;
 static uint64_t pmap_ptc_e_stride1 = 0x2000;
 static uint64_t pmap_ptc_e_stride2 = 0x100000000;
 
-extern volatile u_long pmap_ptc_g_sem;
+struct mtx pmap_ptc_mutex;
 
 /*
  * Data for the RID allocator
@@ -338,6 +338,8 @@ pmap_bootstrap()
 		       pmap_ptc_e_stride1,
 		       pmap_ptc_e_stride2);
 
+	mtx_init(&pmap_ptc_mutex, "PTC.G mutex", NULL, MTX_SPIN);
+
 	/*
 	 * Setup RIDs. RIDs 0..7 are reserved for the kernel.
 	 *
@@ -528,11 +530,11 @@ pmap_invalidate_page(vm_offset_t va)
 {
 	struct ia64_lpte *pte;
 	struct pcpu *pc;
-	uint64_t tag, sem;
-	register_t is;
+	uint64_t tag;
 	u_int vhpt_ofs;
 
 	critical_enter();
+
 	vhpt_ofs = ia64_thash(va) - PCPU_GET(md.vhpt);
 	tag = ia64_ttag(va);
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
@@ -540,34 +542,16 @@ pmap_invalidate_page(vm_offset_t va)
 		atomic_cmpset_64(&pte->tag, tag, 1UL << 63);
 	}
 
-	/* PTC.G enter exclusive */
-	is = intr_disable();
-
-	/* Atomically assert writer after all writers have gone. */
-	do {
-		/* Wait until there's no more writer. */
-		do {
-			sem = atomic_load_acq_long(&pmap_ptc_g_sem);
-			tag = sem | (1ul << 63);
-		} while (sem == tag);
-	} while (!atomic_cmpset_rel_long(&pmap_ptc_g_sem, sem, tag));
-
-	/* Wait until all readers are gone. */
-	tag = (1ul << 63);
-	do {
-		sem = atomic_load_acq_long(&pmap_ptc_g_sem);
-	} while (sem != tag);
+	mtx_lock_spin(&pmap_ptc_mutex);
 
 	ia64_ptc_ga(va, PAGE_SHIFT << 2);
 	ia64_mf();
 	ia64_srlz_i();
 
-	/* PTC.G leave exclusive */
-	atomic_store_rel_long(&pmap_ptc_g_sem, 0);
+	mtx_unlock_spin(&pmap_ptc_mutex);
 
 	ia64_invala();
 
-	intr_restore(is);
 	critical_exit();
 }
 


More information about the svn-src-head mailing list