PERFORCE change 42553 for review
Peter Wemm
peter at FreeBSD.org
Sun Nov 16 09:16:11 PST 2003
http://perforce.freebsd.org/chv.cgi?CH=42553
Change 42553 by peter at peter_overcee on 2003/11/16 09:15:56
Checkpoint before I lose it.
Affected files ...
.. //depot/projects/hammer/sys/amd64/amd64/mpboot.S#5 edit
Differences ...
==== //depot/projects/hammer/sys/amd64/amd64/mpboot.S#5 (text+ko) ====
@@ -47,13 +47,14 @@
mov %ax, %ss
/*
- * Patch the descriptor table
+ * Find relocation base and patch the gdt descript and ljmp targets
*/
- xorl %eax,%eax
- mov %cs, %ax
- sall $4, %eax
- orl %eax, bootcode-gdt+2
- orl %eax, bootdata-gdt+2
+ xorl %ebx,%ebx
+ mov %cs, %bx
+ sall $4, %ebx /* %ebx is now our relocation base */
+ orl %ebx, lgdt_desc-mptramp_start+2
+ orl %ebx, jmp_32-mptramp_start+2
+ orl %ebx, jmp_64-mptramp_start+1
/*
* Load the descriptor table pointer. We'll need it when running
@@ -63,65 +64,101 @@
/* Enable protected mode */
movl $CR0_PE, %eax
- movl %eax, %cr0
+ mov %eax, %cr0
/*
* Now execute a far jump to turn on protected mode. This
* causes the segment registers to turn into selectors and causes
* %cs to be loaded from the gdt.
+ *
+ * The following instruction is:
+ * ljmpl $bootcode-gdt, $protmode-mptramp_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
*/
+jmp_32:
+ .byte 0x66 /* size override to 32 bits */
.byte 0xea /* opcode for far jump */
- .word protmode-mptramp_start /* offset in segment */
- .word bootcode-gdt /* index in gdt for 16 bit code */
+ .long protmode-mptramp_start /* offset in segment */
+ .word bootcode-gdt /* index in gdt for 32 bit code */
/*
* At this point, we are running in 32 bit legacy protected mode.
- * However, we have a non-zero base address in our segment registers
- * so that we can remain relocatable.
*/
.code32
protmode:
mov $bootdata-gdt, %eax
mov %ax, %ds
- mov %ax, %ss
/* Turn on the PAE, PSE and PGE bits for when paging is enabled */
mov %cr4, %eax
- orl $(CR4_PAE | CR4_PSE | CR4_PGE), %eax
+ orl $(CR4_PAE | CR4_PSE), %eax
mov %eax, %cr4
/*
+ * Enable EFER.LME so that we get long mode when all the prereqs are
+ * in place. In this case, it turns on when CR0_PG is finally enabled.
+ * Pick up a few other EFER bits that we'll use need we're here.
+ */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_LME | EFER_SCE | EFER_NXE, %eax
+ wrmsr
+
+ /*
* Point to the embedded page tables for startup. Note that this
* only gets accessed after we're actually in 64 bit mode, however
* we can only set the bottom 32 bits of %cr3 in this state. This
* means we are required to use a temporary page table that is below
- * the 4GB limit.
+ * the 4GB limit. %ebx is still our relocation base. We could just
+ * subtract 3 * PAGE_SIZE, but that would be too easy.
*/
- movl $(mptramp_pagetables-mptramp_start), %eax
+ leal mptramp_pagetables-mptramp_start(%ebx),%eax
+ movl (%eax), %eax
mov %eax, %cr3
/*
- * Enable EFER.LME so that we get long mode when all the prereqs are
- * in place. In this case, it turns on when CR0_PG is finally enabled.
- */
- movl $MSR_EFER, %ecx
- rdmsr
- orl $EFER_LME, %eax
- wrmsr
-
- /*
- * Finally, switch to 64 bit mode by enabling paging. We have
+ * Finally, switch to long bit mode by enabling paging. We have
* to be very careful here because all the segmentation disappears
* out from underneath us. The spec says we can depend on the
- * subsequent pipelined long jump to execute. This is Magic.
+ * subsequent pipelined branch to execute, but *only if* everthing
+ * is still identity mapped. If any mappings change, the pipeline
+ * will flush.
*/
mov %cr0, %eax
orl $CR0_PG, %eax
- mov %eax, %cr3
+ mov %eax, %cr0
+
+ /*
+ * At this point paging is enabled, and we are in "compatability" mode.
+ * We do another far jump to reload %cs with the 64 bit selector.
+ * %cr3 points to a 4-level page table page.
+ * We cannot yet jump all the way to the kernel because we can only
+ * specify a 32 bit linear address. So, yet another trampoline.
+ *
+ * The following instruction is:
+ * ljmp $kernelcode-gdt, $tramp_64-mptramp_start
+ * but gas cannot assemble that. And besides, we patch the targets
+ * in early startup and its a little clearer what we are patching.
+ */
+jmp_64:
+ .byte 0xea /* opcode for far jump */
+ .long tramp_64-mptramp_start /* offset in segment */
+ .word kernelcode-gdt /* index in gdt for 64 bit code */
- .byte 0xea /* opcode for far jump */
- .quad entry_64 /* 64 bit flat address */
- .word kernelcode-gdt /* selector offset */
+ /*
+ * Yeehar! We're running in 64 bit mode! We can mostly ignore our
+ * segment registers, and get on with it.
+ * Note that we are running at the correct virtual address, but with
+ * a 1:1 1GB mirrored mapping over entire address space. We had better
+ * switch to a real %cr3 promptly so that we can get to the direct map
+ * space. Remember that jmp is relative and that we've been relocated,
+ * so use an indirect jump.
+ */
+ .code64
+tramp_64:
+ movabsq $entry_64,%rax /* 64 bit immediate load */
+ jmp *%rax
.p2align 4,0
gdt:
@@ -143,7 +180,6 @@
* This is the descriptor for the 32 bit boot code.
* %cs: +A, +R, -C, DPL=0, +P, +D, +G
* Accessed, Readable, Present, 32 bit, 4G granularity
- * Note that the base address is patched.
*/
bootcode:
.long 0x0000ffff
@@ -158,7 +194,6 @@
* Accessed, Writeable, Expand up, Present, 32 bit, 4GB
* For %ds, +D means 'default operand size is 32 bit'.
* For %ss, +B means the stack register is %esp rather than %sp.
- * Note that the base address is patched.
*/
bootdata:
.long 0x0000ffff
@@ -166,9 +201,14 @@
gdtend:
+ /*
+ * The address of our page table pages that the boot code
+ * uses to trampoline up to kernel address space.
+ */
.globl mptramp_pagetables
mptramp_pagetables:
.long 0
+
/*
* The pseudo descriptor for lgdt to use.
*/
@@ -179,19 +219,18 @@
.globl mptramp_end
mptramp_end:
-
/*
- * Yeehar! We're running in 64 bit mode! We can mostly ignore our
- * segment registers, and get on with it.
- * Load a basic stack pointer and jump into the kernel.
- * Note that we are running at the correct virtual address, but with
- * a 1:1 1GB mirrored mapping over entire address space. We had better
- * switch to a real %cr3 promptly.
+ * From here on down is executed in the kernel .text section.
+ *
+ * Load a real %cr3 that has all the direct map stuff and switches
+ * off the 1GB replicated mirror. Load a stack pointer and jump
+ * into AP startup code in C.
*/
.text
.code64
.p2align 4,0
entry_64:
- movq $bootSTK, %rsp
- pushq $init_secondary
- ret
+ movq KPML4phys, %rax
+ movq %rax, %cr3
+ movq bootSTK, %rsp
+ jmp init_secondary
More information about the p4-projects
mailing list