PERFORCE change 137480 for review
Peter Wemm
peter at FreeBSD.org
Wed Mar 12 09:16:14 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=137480
Change 137480 by peter at peter_melody on 2008/03/12 09:15:42
More speedups. (was 10%, now 23% elapsed time reduction from baseline)
Affected files ...
.. //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#45 edit
.. //depot/projects/hammer/sys/amd64/amd64/genassym.c#50 edit
.. //depot/projects/hammer/sys/amd64/amd64/machdep.c#165 edit
.. //depot/projects/hammer/sys/amd64/amd64/sys_machdep.c#27 edit
.. //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#91 edit
.. //depot/projects/hammer/sys/amd64/ia32/ia32_signal.c#22 edit
.. //depot/projects/hammer/sys/amd64/include/pcpu.h#28 edit
.. //depot/projects/hammer/sys/amd64/linux32/linux32_machdep.c#21 edit
.. //depot/projects/hammer/sys/amd64/linux32/linux32_sysvec.c#19 edit
Differences ...
==== //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#45 (text+ko) ====
@@ -80,7 +80,6 @@
movq TD_PCB(%rsi),%rdx /* newtd->td_proc */
movq PCB_CR3(%rdx),%rdx
movq %rdx,%cr3 /* new address space */
- xorl %r9d,%r9d /* Old pcb pointer */
movq TD_PCB(%rsi),%r8
jmp swact
END(cpu_throw)
@@ -108,9 +107,6 @@
movq %rbx,PCB_RBX(%r8)
movq %rax,PCB_RIP(%r8)
- /* Save copy of pcb pointer */
- movq %r8,%r9
-
testl $PCB_32BIT,PCB_FLAGS(%r8)
jnz store_gs /* static predict not taken */
done_store_gs:
@@ -175,32 +171,29 @@
testl $TDP_KTHREAD,TD_PFLAGS(%rsi)
jnz do_tss
- testq %r9,%r9
- jz no_pcb
- cmpq PCB_FSBASE(%r9),%r10
- cmpq PCB_GSBASE(%r9),%r11
- jmp check_bases
-no_pcb:
- movq $-1,%r10 /* Illegal value - force reload on cpu_throw */
- movq %r10,%r11
+ movq PCPU(FSBASE),%r10
+ movq PCPU(GSBASE),%r11
-check_bases:
- movq PCB_FSBASE(%r8),%r10
+ cmpq PCB_FSBASE(%r8),%r10
jz 1f
/* Restore userland %fs */
movl $MSR_FSBASE,%ecx
movl PCB_FSBASE(%r8),%eax
movl PCB_FSBASE+4(%r8),%edx
wrmsr
+ movq PCB_FSBASE(%r8),%rax
+ movq %rax,PCPU(FSBASE)
1:
- movq PCB_GSBASE(%r8),%r11
+ cmpq PCB_GSBASE(%r8),%r11
jz 2f
/* Restore userland %gs */
movl $MSR_KGSBASE,%ecx
movl PCB_GSBASE(%r8),%eax
movl PCB_GSBASE+4(%r8),%edx
wrmsr
+ movq PCB_GSBASE(%r8),%rax
+ movq %rax,PCPU(GSBASE)
2:
do_tss:
@@ -262,38 +255,38 @@
store_dr:
movq %dr7,%rax /* yes, do the save */
+ movq %dr0,%r15
+ movq %dr1,%r14
+ movq %dr2,%r13
+ movq %dr3,%r12
+ movq %dr6,%r11
+ andq $0x0000fc00, %rax /* disable all watchpoints */
+ movq %r15,PCB_DR0(%r8)
+ movq %r14,PCB_DR1(%r8)
+ movq %r13,PCB_DR2(%r8)
+ movq %r12,PCB_DR3(%r8)
+ movq %r11,PCB_DR6(%r8)
movq %rax,PCB_DR7(%r8)
- andq $0x0000fc00, %rax /* disable all watchpoints */
movq %rax,%dr7
- movq %dr6,%r11
- movq %dr3,%r12
- movq %dr2,%r13
- movq %dr1,%r14
- movq %dr0,%r15
- movq %r11,PCB_DR6(%r8)
- movq %r12,PCB_DR3(%r8)
- movq %r13,PCB_DR2(%r8)
- movq %r14,PCB_DR1(%r8)
- movq %r15,PCB_DR0(%r8)
jmp done_store_dr
load_dr:
+ movq %dr7,%rax
+ movq PCB_DR0(%r8),%r15
+ movq PCB_DR1(%r8),%r14
+ movq PCB_DR2(%r8),%r13
+ movq PCB_DR3(%r8),%r12
movq PCB_DR6(%r8),%r11
- movq PCB_DR3(%r8),%r12
- movq PCB_DR2(%r8),%r13
- movq PCB_DR1(%r8),%r14
- movq PCB_DR0(%r8),%r15
- movq %r11,%dr6
- movq %r12,%dr3
- movq %r13,%dr2
+ movq PCB_DR7(%r8),%rcx
+ movq %r15,%dr0
movq %r14,%dr1
- movq %r15,%dr0
- /* But preserve reserved bits in %dr7 */
- movq %dr7,%rax
- movq PCB_DR7(%r8),%rcx
+ /* Preserve reserved bits in %dr7 */
andq $0x0000fc00,%rax
andq $~0x0000fc00,%rcx
+ movq %r13,%dr2
+ movq %r12,%dr3
orq %rcx,%rax
+ movq %r11,%dr6
movq %rax,%dr7
jmp done_load_dr
==== //depot/projects/hammer/sys/amd64/amd64/genassym.c#50 (text+ko) ====
@@ -198,6 +198,8 @@
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
+ASSYM(PC_FSBASE, offsetof(struct pcpu, pc_fsbase));
+ASSYM(PC_GSBASE, offsetof(struct pcpu, pc_gsbase));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
==== //depot/projects/hammer/sys/amd64/amd64/machdep.c#165 (text+ko) ====
@@ -601,6 +601,8 @@
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
+ PCPU_SET(fsbase, 0);
+ PCPU_SET(gsbase, 0);
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
==== //depot/projects/hammer/sys/amd64/amd64/sys_machdep.c#27 (text+ko) ====
@@ -73,6 +73,7 @@
if (!error) {
critical_enter();
wrmsr(MSR_FSBASE, i386base);
+ PCPU_SET(fsbase, i386base);
pcb->pcb_fsbase = i386base;
critical_exit();
}
@@ -86,6 +87,7 @@
if (!error) {
critical_enter();
wrmsr(MSR_KGSBASE, i386base);
+ PCPU_SET(gsbase, i386base);
pcb->pcb_gsbase = i386base;
critical_exit();
}
@@ -100,6 +102,7 @@
if (a64base < VM_MAXUSER_ADDRESS) {
critical_enter();
wrmsr(MSR_FSBASE, a64base);
+ PCPU_SET(fsbase, a64base);
pcb->pcb_fsbase = a64base;
critical_exit();
} else {
@@ -118,6 +121,7 @@
if (a64base < VM_MAXUSER_ADDRESS) {
critical_enter();
wrmsr(MSR_KGSBASE, a64base);
+ PCPU_SET(gsbase, a64base);
pcb->pcb_gsbase = a64base;
critical_exit();
} else {
==== //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#91 (text+ko) ====
@@ -385,6 +385,7 @@
critical_enter();
td->td_pcb->pcb_gsbase = (register_t)tls_base;
wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
+ PCPU_SET(gsbase, td->td_pcb->pcb_gsbase);
critical_exit();
} else {
td->td_pcb->pcb_gsbase = (register_t)tls_base;
@@ -396,6 +397,7 @@
critical_enter();
td->td_pcb->pcb_fsbase = (register_t)tls_base;
wrmsr(MSR_FSBASE, td->td_pcb->pcb_fsbase);
+ PCPU_SET(fsbase, td->td_pcb->pcb_fsbase);
critical_exit();
} else {
td->td_pcb->pcb_fsbase = (register_t)tls_base;
==== //depot/projects/hammer/sys/amd64/ia32/ia32_signal.c#22 (text+ko) ====
@@ -715,10 +715,14 @@
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
+ PCPU_SET(fsbase, 0);
+ PCPU_SET(gsbase, 0);
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
+ critical_exit();
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
==== //depot/projects/hammer/sys/amd64/include/pcpu.h#28 (text+ko) ====
@@ -49,7 +49,9 @@
register_t pc_rsp0; \
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
- u_int pc_acpi_id /* ACPI CPU id */
+ u_int pc_acpi_id; /* ACPI CPU id */ \
+ register_t pc_fsbase; /* User values of fsbase */ \
+ register_t pc_gsbase /* User values of gsbase */
#ifdef lint
==== //depot/projects/hammer/sys/amd64/linux32/linux32_machdep.c#21 (text+ko) ====
@@ -1356,6 +1356,7 @@
td->td_pcb->pcb_gs32p = &gdt[GUGS32_SEL];
td->td_pcb->pcb_flags |= PCB_32BIT;
wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
+ PCPU_SET(gsbase, td->td_pcb->pcb_gsbase);
critical_exit();
return (0);
==== //depot/projects/hammer/sys/amd64/linux32/linux32_sysvec.c#19 (text+ko) ====
@@ -820,6 +820,8 @@
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
+ PCPU_SET(fsbase, 0);
+ PCPU_SET(gsbase, 0);
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
More information about the p4-projects
mailing list