svn commit: r332489 - in head: gnu/usr.bin/gdb/kgdb sys/conf sys/dev/dcons sys/dev/hyperv/vmbus/i386 sys/dev/ppc sys/dev/syscons sys/i386/conf sys/i386/i386 sys/i386/include sys/i386/include/pc sys...

Konstantin Belousov kib at FreeBSD.org
Fri Apr 13 20:30:52 UTC 2018


Author: kib
Date: Fri Apr 13 20:30:49 2018
New Revision: 332489
URL: https://svnweb.freebsd.org/changeset/base/332489

Log:
  i386 4/4G split.
  
  The change makes the user and kernel address spaces on i386
  independent, giving each almost the full 4G of usable virtual addresses
  except for one PDE at top used for trampoline and per-CPU trampoline
  stacks, and system structures that must be always mapped, namely IDT,
  GDT, common TSS and LDT, and process-private TSS and LDT if allocated.
  
  By using 1:1 mapping for the kernel text and data, it appeared
  possible to eliminate assembler part of the locore.S which bootstraps
  initial page table and KPTmap.  The code is rewritten in C and moved
  into the pmap_cold(). The comment in vmparam.h explains the KVA
  layout.
  
  There is no PCID mechanism available in protected mode, so each
  kernel/user switch forth and back completely flushes the TLB, except
  for the trampoline PTD region. The TLB invalidations for userspace
  becomes trivial, because IPI handlers switch page tables. On the other
  hand, context switches no longer need to reload %cr3.
  
  copyout(9) was rewritten to use vm_fault_quick_hold().  An issue for
  new copyout(9) is compatibility with wiring user buffers around sysctl
  handlers. This explains two kind of locks for copyout ptes and
  accounting of the vslock() calls.  The vm_fault_quick_hold() AKA slow
  path, is only tried after the 'fast path' failed, which temporary
  changes mapping to the userspace and copies the data to/from small
  per-cpu buffer in the trampoline.  If a page fault occurs during the
  copy, it is short-circuit by exception.s to not even reach C code.
  
  The change was motivated by the need to implement the Meltdown
  mitigation, but instead of KPTI the full split is done.  The i386
  architecture already shows the sizing problems, in particular, it is
  impossible to link clang and lld with debugging.  I expect that the
  issues due to the virtual address space limits would only exaggerate
  and the split gives more liveness to the platform.
  
  Tested by: pho
  Discussed with:	bde
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 month
  Differential revision:	https://reviews.freebsd.org/D14633

Added:
  head/sys/i386/i386/copyout.c   (contents, props changed)
  head/sys/i386/i386/copyout_fast.s
     - copied, changed from r332488, head/sys/i386/i386/support.s
Modified:
  head/gnu/usr.bin/gdb/kgdb/trgt_i386.c
  head/sys/conf/files.i386
  head/sys/conf/ldscript.i386
  head/sys/dev/dcons/dcons_crom.c
  head/sys/dev/dcons/dcons_os.c
  head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
  head/sys/dev/ppc/ppc.c
  head/sys/dev/syscons/syscons.c
  head/sys/i386/conf/NOTES
  head/sys/i386/i386/apic_vector.s
  head/sys/i386/i386/atpic_vector.s
  head/sys/i386/i386/bios.c
  head/sys/i386/i386/db_interface.c
  head/sys/i386/i386/db_trace.c
  head/sys/i386/i386/elf_machdep.c
  head/sys/i386/i386/exception.s
  head/sys/i386/i386/genassym.c
  head/sys/i386/i386/locore.s
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/mem.c
  head/sys/i386/i386/minidump_machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/mpboot.s
  head/sys/i386/i386/pmap.c
  head/sys/i386/i386/sigtramp.s
  head/sys/i386/i386/support.s
  head/sys/i386/i386/swtch.s
  head/sys/i386/i386/sys_machdep.c
  head/sys/i386/i386/trap.c
  head/sys/i386/i386/vm86.c
  head/sys/i386/i386/vm86bios.s
  head/sys/i386/i386/vm_machdep.c
  head/sys/i386/include/asmacros.h
  head/sys/i386/include/frame.h
  head/sys/i386/include/md_var.h
  head/sys/i386/include/param.h
  head/sys/i386/include/pc/bios.h
  head/sys/i386/include/pcpu.h
  head/sys/i386/include/pmap.h
  head/sys/i386/include/segments.h
  head/sys/i386/include/vmparam.h
  head/sys/kern/imgact_aout.c
  head/sys/kern/subr_witness.c
  head/sys/x86/acpica/acpi_wakeup.c
  head/sys/x86/x86/local_apic.c
  head/sys/x86/x86/mp_x86.c
  head/sys/x86/x86/mptable.c

Modified: head/gnu/usr.bin/gdb/kgdb/trgt_i386.c
==============================================================================
--- head/gnu/usr.bin/gdb/kgdb/trgt_i386.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/gnu/usr.bin/gdb/kgdb/trgt_i386.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -29,6 +29,8 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
 #include <machine/pcb.h>
 #include <machine/frame.h>
 #include <machine/segments.h>
@@ -279,12 +281,26 @@ kgdb_trgt_frame_cache(struct frame_info *next_frame, v
 	char buf[MAX_REGISTER_SIZE];
 	struct kgdb_frame_cache *cache;
 	char *pname;
+	CORE_ADDR pcx;
+	uintptr_t addr, setidt_disp;
 
 	cache = *this_cache;
 	if (cache == NULL) {
 		cache = FRAME_OBSTACK_ZALLOC(struct kgdb_frame_cache);
 		*this_cache = cache;
-		cache->pc = frame_func_unwind(next_frame);
+		pcx = frame_pc_unwind(next_frame);
+		if (pcx >= PMAP_TRM_MIN_ADDRESS) {
+			addr = kgdb_lookup("setidt_disp");
+			if (addr != 0) {
+				if (kvm_read(kvm, addr, &setidt_disp,
+				    sizeof(setidt_disp)) !=
+				    sizeof(setidt_disp))
+					warnx("kvm_read: %s", kvm_geterr(kvm));
+				else
+					pcx -= setidt_disp;
+			}
+		}
+		cache->pc = pcx;
 		find_pc_partial_function(cache->pc, &pname, NULL, NULL);
 		if (pname[0] != 'X')
 			cache->frame_type = FT_NORMAL;
@@ -373,6 +389,8 @@ kgdb_trgt_trapframe_sniffer(struct frame_info *next_fr
 	CORE_ADDR pc;
 
 	pc = frame_pc_unwind(next_frame);
+	if (pc >= PMAP_TRM_MIN_ADDRESS)
+		return (&kgdb_trgt_trapframe_unwind);
 	pname = NULL;
 	find_pc_partial_function(pc, &pname, NULL, NULL);
 	if (pname == NULL)

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/conf/files.i386	Fri Apr 13 20:30:49 2018	(r332489)
@@ -483,6 +483,7 @@ i386/i386/atomic.c		standard		\
 i386/i386/bios.c		standard
 i386/i386/bioscall.s		standard
 i386/i386/bpf_jit_machdep.c	optional bpf_jitter
+i386/i386/copyout.c		standard
 i386/i386/db_disasm.c		optional ddb
 i386/i386/db_interface.c	optional ddb
 i386/i386/db_trace.c		optional ddb

Modified: head/sys/conf/ldscript.i386
==============================================================================
--- head/sys/conf/ldscript.i386	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/conf/ldscript.i386	Fri Apr 13 20:30:49 2018	(r332489)
@@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib);
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
-  . = kernbase + kernload + SIZEOF_HEADERS;
+  . = kernbase + SIZEOF_HEADERS;
   .interp         : { *(.interp) }
   .hash           : { *(.hash) }
   .gnu.hash       : { *(.gnu.hash) }

Modified: head/sys/dev/dcons/dcons_crom.c
==============================================================================
--- head/sys/dev/dcons/dcons_crom.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/dev/dcons/dcons_crom.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -109,7 +109,11 @@ dcons_crom_expose_idt(struct dcons_crom_softc *sc)
 	static off_t idt_paddr;
 
 	/* XXX */
+#ifdef __amd64__
 	idt_paddr = (char *)idt - (char *)KERNBASE;
+#else /* __i386__ */
+	idt_paddr = (off_t)pmap_kextract((vm_offset_t)idt);
+#endif
 
 	crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_HI, ADDR_HI(idt_paddr));
 	crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_LO, ADDR_LO(idt_paddr));

Modified: head/sys/dev/dcons/dcons_os.c
==============================================================================
--- head/sys/dev/dcons/dcons_os.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/dev/dcons/dcons_os.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -309,11 +309,16 @@ dcons_drv_init(int stage)
 		 * Allow read/write access to dcons buffer.
 		 */
 		for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE)
-			*vtopte(KERNBASE + pa) |= PG_RW;
+			*vtopte(PMAP_MAP_LOW + pa) |= PG_RW;
 		invltlb();
 #endif
 		/* XXX P to V */
+#ifdef __amd64__
 		dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr);
+#else /* __i386__ */
+		dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW +
+		    addr);
+#endif
 		dg.size = size;
 		if (dcons_load_buffer(dg.buf, dg.size, sc) < 0)
 			dg.buf = NULL;

Modified: head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S
==============================================================================
--- head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S	Fri Apr 13 20:30:49 2018	(r332489)
@@ -26,11 +26,12 @@
  * $FreeBSD$
  */
 
+#include "assym.inc"
+
+#include <machine/psl.h>
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 
-#include "assym.inc"
-
 /*
  * This is the Hyper-V vmbus channel direct callback interrupt.
  * Only used when it is running on Hyper-V.
@@ -42,6 +43,7 @@ IDTVEC(vmbus_isr)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
+	KENTER
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	vmbus_handle_intr

Modified: head/sys/dev/ppc/ppc.c
==============================================================================
--- head/sys/dev/ppc/ppc.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/dev/ppc/ppc.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/vmparam.h>
+#include <machine/pc/bios.h>
 #endif
 
 #include <dev/ppbus/ppbconf.h>
@@ -121,7 +122,7 @@ static char *ppc_epp_protocol[] = { " (EPP 1.9)", " (E
  * BIOS printer list - used by BIOS probe.
  */
 #define	BIOS_PPC_PORTS	0x408
-#define	BIOS_PORTS	(short *)(KERNBASE+BIOS_PPC_PORTS)
+#define	BIOS_PORTS	((short *)BIOS_PADDRTOVADDR(BIOS_PPC_PORTS))
 #define	BIOS_MAX_PPC	4
 #endif
 

Modified: head/sys/dev/syscons/syscons.c
==============================================================================
--- head/sys/dev/syscons/syscons.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/dev/syscons/syscons.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -288,7 +288,11 @@ ec_putc(int c)
 		 * This is enough for ec_putc() to work very early on x86
 		 * if the kernel starts in normal color text mode.
 		 */
+#ifdef __amd64__
 		fb = KERNBASE + 0xb8000;
+#else /* __i386__ */
+		fb = PMAP_MAP_LOW + 0xb8000;
+#endif
 		xsize = 80;
 		ysize = 25;
 #endif

Modified: head/sys/i386/conf/NOTES
==============================================================================
--- head/sys/i386/conf/NOTES	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/i386/conf/NOTES	Fri Apr 13 20:30:49 2018	(r332489)
@@ -895,19 +895,6 @@ options 	ENABLE_ALART		# Control alarm on Intel intpm 
 options 	PMAP_SHPGPERPROC=201
 
 #
-# Change the size of the kernel virtual address space.  Due to
-# constraints in loader(8) on i386, this must be a multiple of 4.
-# 256 = 1 GB of kernel address space.  Increasing this also causes
-# a reduction of the address space in user processes.  512 splits
-# the 4GB cpu address space in half (2GB user, 2GB kernel).  For PAE
-# kernels, the value will need to be double non-PAE.  A value of 1024
-# for PAE kernels is necessary to split the address space in half.
-# This will likely need to be increased to handle memory sizes >4GB.
-# PAE kernels default to a value of 512.
-#
-options 	KVA_PAGES=260
-
-#
 # Number of initial kernel page table pages used for early bootstrap.
 # This number should include enough pages to map the kernel, any
 # modules or other data loaded with the kernel by the loader, and data
@@ -950,22 +937,6 @@ device		ndis
 

 #####################################################################
 # VM OPTIONS
-
-# Disable the 4 MByte page PSE CPU feature.  The PSE feature allows the
-# kernel to use 4 MByte pages to map the kernel instead of 4k pages.
-# This saves on the amount of memory needed for page tables needed to
-# map the kernel.  You should only disable this feature as a temporary
-# workaround if you are having problems with it enabled.
-#
-#options 	DISABLE_PSE
-
-# Disable the global pages PGE CPU feature.  The PGE feature allows pages
-# to be marked with the PG_G bit.  TLB entries for these pages are not
-# flushed from the cache when %cr3 is reloaded.  This can make context
-# switches less expensive.  You should only disable this feature as a
-# temporary workaround if you are having problems with it enabled.
-#
-#options 	DISABLE_PG_G
 
 # KSTACK_PAGES is the number of memory pages to assign to the kernel
 # stack of each thread.

Modified: head/sys/i386/i386/apic_vector.s
==============================================================================
--- head/sys/i386/i386/apic_vector.s	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/i386/i386/apic_vector.s	Fri Apr 13 20:30:49 2018	(r332489)
@@ -39,6 +39,7 @@
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
+#include <machine/psl.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
@@ -67,34 +68,39 @@ as_lapic_eoi:
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
-#define	ISR_VEC(index, vec_name)					\
-	.text ;								\
-	SUPERALIGN_TEXT ;						\
-IDTVEC(vec_name ## _pti) ;						\
-IDTVEC(vec_name) ;							\
-	PUSH_FRAME ;							\
-	SET_KERNEL_SREGS ;						\
-	cld ;								\
-	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
-	cmpl	$0,x2apic_mode ;					\
-	je	1f ;							\
-	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
-	rdmsr ;								\
-	jmp	2f ;							\
-1: ;									\
-	movl	lapic_map, %edx ;/* pointer to local APIC */		\
-	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
-2: ;									\
-	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
-	jz	3f ;							\
-	addl	$(32 * index),%eax ;					\
-	pushl	%esp		;                                       \
-	pushl	%eax ;		/* pass the IRQ */			\
-	call	lapic_handle_intr ;					\
-	addl	$8, %esp ;	/* discard parameter */			\
-3: ;									\
-	MEXITCOUNT ;							\
+	.macro	ISR_VEC	index, vec_name
+	.text
+	SUPERALIGN_TEXT
+	.globl	X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	KENTER
+	FAKE_MCOUNT(TF_EIP(%esp))
+	cmpl	$0,x2apic_mode
+	je	2f
+	movl	$(MSR_APIC_ISR0 + \index),%ecx
+	rdmsr
+	jmp	3f
+2:
+	movl	lapic_map, %edx		/* pointer to local APIC */
+	movl	LA_ISR + 16 * \index(%edx), %eax	/* load ISR */
+3:
+	bsrl	%eax, %eax	/* index of highest set bit in ISR */
+	jz	4f
+	addl	$(32 * \index),%eax
+	pushl	%esp
+	pushl	%eax		/* pass the IRQ */
+	movl	$lapic_handle_intr, %eax
+	call	*%eax
+	addl	$8, %esp	/* discard parameter */
+4:
+	MEXITCOUNT
 	jmp	doreti
+	.endm
 
 /*
  * Handle "spurious INTerrupts".
@@ -111,13 +117,13 @@ IDTVEC(spuriousint)
 
 	iret
 
-	ISR_VEC(1, apic_isr1)
-	ISR_VEC(2, apic_isr2)
-	ISR_VEC(3, apic_isr3)
-	ISR_VEC(4, apic_isr4)
-	ISR_VEC(5, apic_isr5)
-	ISR_VEC(6, apic_isr6)
-	ISR_VEC(7, apic_isr7)
+	ISR_VEC	1, apic_isr1
+	ISR_VEC	2, apic_isr2
+	ISR_VEC	3, apic_isr3
+	ISR_VEC	4, apic_isr4
+	ISR_VEC	5, apic_isr5
+	ISR_VEC	6, apic_isr6
+	ISR_VEC	7, apic_isr7
 
 /*
  * Local APIC periodic timer handler.
@@ -129,9 +135,11 @@ IDTVEC(timerint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
+	KENTER
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
-	call	lapic_handle_timer
+	movl	$lapic_handle_timer, %eax
+	call	*%eax
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
@@ -146,8 +154,10 @@ IDTVEC(cmcint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
+	KENTER
 	FAKE_MCOUNT(TF_EIP(%esp))
-	call	lapic_handle_cmc
+	movl	$lapic_handle_cmc, %eax
+	call	*%eax
 	MEXITCOUNT
 	jmp	doreti
 
@@ -161,8 +171,10 @@ IDTVEC(errorint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
+	KENTER
 	FAKE_MCOUNT(TF_EIP(%esp))
-	call	lapic_handle_error
+	movl	$lapic_handle_error, %eax
+	call	*%eax
 	MEXITCOUNT
 	jmp	doreti
 
@@ -177,9 +189,11 @@ IDTVEC(xen_intr_upcall)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
+	KENTER
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
-	call	xen_intr_handle_upcall
+	movl	$xen_intr_handle_upcall, %eax
+	call	*%eax
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
@@ -200,9 +214,9 @@ IDTVEC(invltlb)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
-	call	invltlb_handler
-
+	KENTER
+	movl	$invltlb_handler, %eax
+	call	*%eax
 	jmp	invltlb_ret
 
 /*
@@ -214,9 +228,9 @@ IDTVEC(invlpg)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
-	call	invlpg_handler
-
+	KENTER
+	movl	$invlpg_handler, %eax
+	call	*%eax
 	jmp	invltlb_ret
 
 /*
@@ -228,9 +242,9 @@ IDTVEC(invlrng)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
-	call	invlrng_handler
-
+	KENTER
+	movl	$invlrng_handler, %eax
+	call	*%eax
 	jmp	invltlb_ret
 
 /*
@@ -242,9 +256,9 @@ IDTVEC(invlcache)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
-	call	invlcache_handler
-
+	KENTER
+	movl	$invlcache_handler, %eax
+	call	*%eax
 	jmp	invltlb_ret
 
 /*
@@ -256,12 +270,11 @@ IDTVEC(ipi_intr_bitmap_handler)	
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
+	KENTER
 	call	as_lapic_eoi
-	
 	FAKE_MCOUNT(TF_EIP(%esp))
-
-	call	ipi_bitmap_handler
+	movl	$ipi_bitmap_handler, %eax
+	call	*%eax
 	MEXITCOUNT
 	jmp	doreti
 
@@ -274,9 +287,10 @@ IDTVEC(cpustop)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
+	KENTER
 	call	as_lapic_eoi
-	call	cpustop_handler
+	movl	$cpustop_handler, %eax
+	call	*%eax
 	jmp	doreti
 
 /*
@@ -288,9 +302,10 @@ IDTVEC(cpususpend)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
+	KENTER
 	call	as_lapic_eoi
-	call	cpususpend_handler
+	movl	$cpususpend_handler, %eax
+	call	*%eax
 	jmp	doreti
 
 /*
@@ -304,14 +319,14 @@ IDTVEC(rendezvous)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
-
+	KENTER
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movl	ipi_rendezvous_counts(,%eax,4), %eax
 	incl	(%eax)
 #endif
-	call	smp_rendezvous_action
-
+	movl	$smp_rendezvous_action, %eax
+	call	*%eax
 	call	as_lapic_eoi
 	jmp	doreti
 	

Modified: head/sys/i386/i386/atpic_vector.s
==============================================================================
--- head/sys/i386/i386/atpic_vector.s	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/i386/i386/atpic_vector.s	Fri Apr 13 20:30:49 2018	(r332489)
@@ -36,6 +36,7 @@
  * master and slave interrupt controllers.
  */
 
+#include <machine/psl.h>
 #include <machine/asmacros.h>
 
 #include "assym.inc"
@@ -43,37 +44,41 @@
 /*
  * Macros for interrupt entry, call to handler, and exit.
  */
-#define	INTR(irq_num, vec_name) \
-	.text ;								\
-	SUPERALIGN_TEXT ;						\
-IDTVEC(vec_name ##_pti) ;						\
-IDTVEC(vec_name) ;							\
-	PUSH_FRAME ;							\
-	SET_KERNEL_SREGS ;						\
-	cld ;								\
-;									\
-	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
-	pushl	%esp		;                                       \
-	pushl	$irq_num; 	/* pass the IRQ */			\
-	call	atpic_handle_intr ;					\
-	addl	$8, %esp ;	/* discard the parameters */		\
-;									\
-	MEXITCOUNT ;							\
+	.macro	INTR	irq_num, vec_name
+	.text
+	SUPERALIGN_TEXT
+	.globl	X\()\vec_name\()_pti, X\()\vec_name
+
+X\()\vec_name\()_pti:
+X\()\vec_name:
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	KENTER
+	FAKE_MCOUNT(TF_EIP(%esp))
+	pushl	%esp
+	pushl	$\irq_num 	/* pass the IRQ */
+	movl	$atpic_handle_intr, %eax
+	call	*%eax
+	addl	$8, %esp	/* discard the parameters */
+
+	MEXITCOUNT
 	jmp	doreti
+	.endm
 
-	INTR(0, atpic_intr0)
-	INTR(1, atpic_intr1)
-	INTR(2, atpic_intr2)
-	INTR(3, atpic_intr3)
-	INTR(4, atpic_intr4)
-	INTR(5, atpic_intr5)
-	INTR(6, atpic_intr6)
-	INTR(7, atpic_intr7)
-	INTR(8, atpic_intr8)
-	INTR(9, atpic_intr9)
-	INTR(10, atpic_intr10)
-	INTR(11, atpic_intr11)
-	INTR(12, atpic_intr12)
-	INTR(13, atpic_intr13)
-	INTR(14, atpic_intr14)
-	INTR(15, atpic_intr15)
+	INTR	0, atpic_intr0
+	INTR	1, atpic_intr1
+	INTR	2, atpic_intr2
+	INTR	3, atpic_intr3
+	INTR	4, atpic_intr4
+	INTR	5, atpic_intr5
+	INTR	6, atpic_intr6
+	INTR	7, atpic_intr7
+	INTR	8, atpic_intr8
+	INTR	9, atpic_intr9
+	INTR	10, atpic_intr10
+	INTR	11, atpic_intr11
+	INTR	12, atpic_intr12
+	INTR	13, atpic_intr13
+	INTR	14, atpic_intr14
+	INTR	15, atpic_intr15

Modified: head/sys/i386/i386/bios.c
==============================================================================
--- head/sys/i386/i386/bios.c	Fri Apr 13 19:43:23 2018	(r332488)
+++ head/sys/i386/i386/bios.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -305,6 +305,7 @@ set_bios_selectors(struct bios_segments *seg, int flag
 }
 
 extern int vm86pa;
+extern u_long vm86phystk;
 extern void bios16_jmp(void);
 
 /*
@@ -329,7 +330,7 @@ bios16(struct bios_args *args, char *fmt, ...)
     int 	flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
     u_int 	i, arg_start, arg_end;
     pt_entry_t	*pte;
-    pd_entry_t	*ptd;
+    pd_entry_t	*ptd, orig_ptd;
 
     arg_start = 0xffffffff;
     arg_end = 0;
@@ -390,27 +391,14 @@ bios16(struct bios_args *args, char *fmt, ...)
     args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
     args->seg.code32.limit = 0xffff;	
 
-    ptd = (pd_entry_t *)rcr3();
-#if defined(PAE) || defined(PAE_TABLES)
-    if (ptd == IdlePDPT)
-#else
-    if (ptd == IdlePTD)
-#endif
-    {
-	/*
-	 * no page table, so create one and install it.
-	 */
-	pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
-	ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
-	*pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
-	*ptd = vtophys(pte) | PG_RW | PG_V;
-    } else {
-	/*
-	 * this is a user-level page table 
-	 */
-	pte = PTmap;
-	*pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
-    }
+    /*
+     * no page table, so create one and install it.
+     */
+    pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
+    ptd = IdlePTD;
+    *pte = vm86phystk | PG_RW | PG_V;
+    orig_ptd = *ptd;
+    *ptd = vtophys(pte) | PG_RW | PG_V;
     pmap_invalidate_all(kernel_pmap);	/* XXX insurance for now */
 
     stack_top = stack;
@@ -464,20 +452,12 @@ bios16(struct bios_args *args, char *fmt, ...)
 
     i = bios16_call(&args->r, stack_top);
 
-    if (pte == PTmap) {
-	*pte = 0;			/* remove entry */
-	/*
-	 * XXX only needs to be invlpg(0) but that doesn't work on the 386 
-	 */
-	pmap_invalidate_all(kernel_pmap);
-    } else {
-	*ptd = 0;			/* remove page table */
-	/*
-	 * XXX only needs to be invlpg(0) but that doesn't work on the 386 
-	 */
-	pmap_invalidate_all(kernel_pmap);
-	free(pte, M_TEMP);		/* ... and free it */
-    }
+    *ptd = orig_ptd;		/* remove page table */
+    /*
+     * XXX only needs to be invlpg(0) but that doesn't work on the 386
+     */
+    pmap_invalidate_all(kernel_pmap);
+    free(pte, M_TEMP);		/* ... and free it */
     return (i);
 }
 

Added: head/sys/i386/i386/copyout.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/i386/i386/copyout.c	Fri Apr 13 20:30:49 2018	(r332489)
@@ -0,0 +1,489 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib at FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
+#if defined(PAE) || defined(PAE_TABLES)
+#define	KCR3	((u_int)IdlePDPT)
+#else
+#define	KCR3	((u_int)IdlePTD)
+#endif
+
+int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int);
+static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int);
+int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int);
+static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int);
+int fubyte_fast(volatile const void *base, u_int kcr3);
+static int (*fubyte_fast_tramp)(volatile const void *, u_int);
+int fuword16_fast(volatile const void *base, u_int kcr3);
+static int (*fuword16_fast_tramp)(volatile const void *, u_int);
+int fueword_fast(volatile const void *base, long *val, u_int kcr3);
+static int (*fueword_fast_tramp)(volatile const void *, long *, u_int);
+int subyte_fast(volatile void *base, int val, u_int kcr3);
+static int (*subyte_fast_tramp)(volatile void *, int, u_int);
+int suword16_fast(volatile void *base, int val, u_int kcr3);
+static int (*suword16_fast_tramp)(volatile void *, int, u_int);
+int suword_fast(volatile void *base, long val, u_int kcr3);
+static int (*suword_fast_tramp)(volatile void *, long, u_int);
+
+static int fast_copyout = 1;
+SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN,
+    &fast_copyout, 0,
+    "");
+
+void
+copyout_init_tramp(void)
+{
+
+	copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+	    (uintptr_t)copyin_fast + setidt_disp);
+	copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
+	    (uintptr_t)copyout_fast + setidt_disp);
+	fubyte_fast_tramp = (int (*)(volatile const void *, u_int))(
+	    (uintptr_t)fubyte_fast + setidt_disp);
+	fuword16_fast_tramp = (int (*)(volatile const void *, u_int))(
+	    (uintptr_t)fuword16_fast + setidt_disp);
+	fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))(
+	    (uintptr_t)fueword_fast + setidt_disp);
+	subyte_fast_tramp = (int (*)(volatile void *, int, u_int))(
+	    (uintptr_t)subyte_fast + setidt_disp);
+	suword16_fast_tramp = (int (*)(volatile void *, int, u_int))(
+	    (uintptr_t)suword16_fast + setidt_disp);
+	suword_fast_tramp = (int (*)(volatile void *, long, u_int))(
+	    (uintptr_t)suword_fast + setidt_disp);
+}
+
+static int
+cp_slow0(vm_offset_t uva, size_t len, bool write,
+    void (*f)(vm_offset_t, void *), void *arg)
+{
+	struct pcpu *pc;
+	vm_page_t m[2];
+	pt_entry_t *pte;
+	vm_offset_t kaddr;
+	int error, i, plen;
+	bool sleepable;
+
+	plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE);
+	MPASS(plen <= nitems(m));
+	error = 0;
+	i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len,
+	    (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT,
+	    m, nitems(m));
+	if (i != plen)
+		return (EFAULT);
+	sched_pin();
+	pc = get_pcpu();
+	if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 ||
+	    (curthread->td_pflags & TDP_NOFAULTING) != 0) {
+		sleepable = false;
+		mtx_lock(&pc->pc_copyout_mlock);
+		kaddr = pc->pc_copyout_maddr;
+	} else {
+		sleepable = true;
+		sx_xlock(&pc->pc_copyout_slock);
+		kaddr = pc->pc_copyout_saddr;
+	}
+	for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) {
+		*pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) |
+		    pmap_cache_bits(pmap_page_get_memattr(m[i]), FALSE);
+		invlpg(kaddr + ptoa(i));
+	}
+	kaddr += uva - trunc_page(uva);
+	f(kaddr, arg);
+	sched_unpin();
+	if (sleepable)
+		sx_xunlock(&pc->pc_copyout_slock);
+	else
+		mtx_unlock(&pc->pc_copyout_mlock);
+	for (i = 0; i < plen; i++) {
+		vm_page_lock(m[i]);
+		vm_page_unhold(m[i]);
+		vm_page_unlock(m[i]);
+	}
+	return (error);
+}
+
+struct copyinstr_arg0 {
+	vm_offset_t kc;
+	size_t len;
+	size_t alen;
+	bool end;
+};
+
+static void
+copyinstr_slow0(vm_offset_t kva, void *arg)
+{
+	struct copyinstr_arg0 *ca;
+	char c;
+
+	ca = arg;
+	MPASS(ca->alen == 0 && ca->len > 0 && !ca->end);
+	while (ca->alen < ca->len && !ca->end) {
+		c = *(char *)(kva + ca->alen);
+		*(char *)ca->kc = c;
+		ca->alen++;
+		ca->kc++;
+		if (c == '\0')
+			ca->end = true;
+	}
+}
+
+int
+copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied)
+{
+	struct copyinstr_arg0 ca;
+	vm_offset_t uc;
+	size_t plen;
+	int error;
+
+	error = 0;
+	ca.end = false;
+	for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+	    plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) {
+		ca.len = round_page(uc) - uc;
+		if (ca.len == 0)
+			ca.len = PAGE_SIZE;
+		if (plen + ca.len > maxlen)
+			ca.len = maxlen - plen;
+		ca.alen = 0;
+		if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) {
+			error = EFAULT;
+			break;
+		}
+	}
+	if (!ca.end && plen == maxlen && error == 0)
+		error = ENAMETOOLONG;
+	if (lencopied != NULL)
+		*lencopied = plen;
+	return (error);
+}
+
+struct copyin_arg0 {
+	vm_offset_t kc;
+	size_t len;
+};
+
+static void
+copyin_slow0(vm_offset_t kva, void *arg)
+{
+	struct copyin_arg0 *ca;
+
+	ca = arg;
+	bcopy((void *)kva, (void *)ca->kc, ca->len);
+}
+
+int
+copyin(const void *udaddr, void *kaddr, size_t len)
+{
+	struct copyin_arg0 ca;
+	vm_offset_t uc;
+	size_t plen;
+
+	if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+	    (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+		return (-1);
+	if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+	    copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0))
+		return (0);
+	for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+	    plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+		ca.len = round_page(uc) - uc;
+		if (ca.len == 0)
+			ca.len = PAGE_SIZE;
+		if (plen + ca.len > len)
+			ca.len = len - plen;
+		if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0)
+			return (EFAULT);
+	}
+	return (0);
+}
+
+static void
+copyout_slow0(vm_offset_t kva, void *arg)
+{
+	struct copyin_arg0 *ca;
+
+	ca = arg;
+	bcopy((void *)ca->kc, (void *)kva, ca->len);
+}
+
+int
+copyout(const void *kaddr, void *udaddr, size_t len)
+{
+	struct copyin_arg0 ca;
+	vm_offset_t uc;
+	size_t plen;
+
+	if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
+	    (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
+		return (-1);
+	if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
+	    copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0))
+		return (0);
+	for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
+	    plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
+		ca.len = round_page(uc) - uc;
+		if (ca.len == 0)
+			ca.len = PAGE_SIZE;
+		if (plen + ca.len > len)
+			ca.len = len - plen;
+		if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0)
+			return (EFAULT);
+	}
+	return (0);
+}
+
+/*
+ * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
+ * memory.
+ */
+
+static void
+fubyte_slow0(vm_offset_t kva, void *arg)
+{
+
+	*(int *)arg = *(u_char *)kva;
+}
+
+int
+fubyte(volatile const void *base)
+{
+	int res;
+
+	if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
+	    (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
+		return (-1);
+	if (fast_copyout) {
+		res = fubyte_fast_tramp(base, KCR3);
+		if (res != -1)
+			return (res);
+	}
+	if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0,
+	    &res) != 0)
+		return (-1);
+	return (res);
+}
+
+static void
+fuword16_slow0(vm_offset_t kva, void *arg)
+{
+
+	*(int *)arg = *(uint16_t *)kva;
+}
+
+int
+fuword16(volatile const void *base)
+{
+	int res;
+
+	if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
+	    (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
+		return (-1);
+	if (fast_copyout) {
+		res = fuword16_fast_tramp(base, KCR3);
+		if (res != -1)
+			return (res);
+	}
+	if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false,
+	    fuword16_slow0, &res) != 0)
+		return (-1);
+	return (res);
+}
+
+static void
+fueword_slow0(vm_offset_t kva, void *arg)
+{
+
+	*(uint32_t *)arg = *(uint32_t *)kva;
+}
+
+int
+fueword(volatile const void *base, long *val)
+{
+	uint32_t res;
+
+	if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base ||
+	    (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS)
+		return (-1);
+	if (fast_copyout) {
+		if (fueword_fast_tramp(base, val, KCR3) == 0)
+			return (0);
+	}
+	if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0,
+	    &res) != 0)
+		return (-1);
+	*val = res;
+	return (0);
+}
+
+int
+fueword32(volatile const void *base, int32_t *val)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list