Anybody willing to test out kload?
Russell Cattelan
cattelan at thebarn.com
Thu Nov 15 04:30:07 UTC 2012
A few people have pointed out I sent out the patch in reverse. :-(
I messed up and reversed the tags to git diff.
Here is the corrected patch.
Along with a few corrections and cleanups.
-Russell
-------------- next part --------------
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c609633
diff --git a/sys/amd64/amd64/kload.c b/sys/amd64/amd64/kload.c
new file mode 100644
index 0000000..ed203ae
--- /dev/null
+++ b/sys/amd64/amd64/kload.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011 - 2012
+ * Russell Cattelan Digital Elves Inc
+ * Copyright (c) 2011 - 2012
+ * Isilon Systems, LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/kload.h>
+#include <sys/malloc.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+
+#include <vm/vm_param.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#define GUEST_NULL_SEL 0
+#define GUEST_CODE_SEL 1
+#define GUEST_DATA_SEL 2
+
+void
+setup_freebsd_gdt(uint64_t *gdtr)
+{
+ gdtr[GUEST_NULL_SEL] = 0x0000000000000000;
+ gdtr[GUEST_CODE_SEL] = 0x0020980000000000;
+ gdtr[GUEST_DATA_SEL] = 0x0000920000000000;
+}
+
+pt_entry_t *
+kload_build_page_table(void)
+{
+ pt_entry_t *PT4;
+ pt_entry_t *PT3;
+ pt_entry_t *PT2;
+ int i;
+ unsigned long va;
+
+ va = (unsigned long)kmem_alloc(kernel_map,PAGE_SIZE * 3);
+ PT4 = (pt_entry_t *)va;
+ PT3 = (pt_entry_t *)(PT4 + (PAGE_SIZE / sizeof(unsigned long)));
+ PT2 = (pt_entry_t *)(PT3 + (PAGE_SIZE / sizeof(unsigned long)));
+
+ if (bootverbose)
+ printf("%s PT4 0x%lx (0x%lx) PT3 0x%lx (0x%lx) "
+ "PT2 0x%lx (0x%lx)\n",
+ __func__,
+ (unsigned long)PT4, (unsigned long)vtophys(PT4),
+ (unsigned long)PT3, (unsigned long)vtophys(PT3),
+ (unsigned long)PT2, (unsigned long)vtophys(PT2));
+
+ /*
+ * The following section is a direct copy of
+ * head/src/sys/boot/i386/libi386/elf64_freebsd.c:92 at r236688
+ */
+
+ bzero(PT4, PAGE_SIZE);
+ bzero(PT3, PAGE_SIZE);
+ bzero(PT2, PAGE_SIZE);
+
+ /*
+ * This is kinda brutal, but every single 1GB VM memory segment points
+ * to the same first 1GB of physical memory. But it is more than
+ * adequate.
+ */
+ for (i = 0; i < 512; i++) {
+ /*
+ * Each slot of the level 4 pages points to the
+ * same level 3 page
+ */
+ PT4[i] = (pt_entry_t)(vtophys(PT3));
+ PT4[i] |= PG_V | PG_RW | PG_U;
+
+ /*
+ * Each slot of the level 3 pages points to the
+ * same level 2 page
+ */
+ PT3[i] = (pt_entry_t)(vtophys(PT2));
+ PT3[i] |= PG_V | PG_RW | PG_U;
+
+ /* The level 2 page slots are mapped with 2MB pages for 1GB. */
+ PT2[i] = i * (2 * 1024 * 1024);
+ PT2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+ }
+ return ((pt_entry_t *)vtophys(PT4));
+}
diff --git a/sys/amd64/amd64/kload_exec.S b/sys/amd64/amd64/kload_exec.S
new file mode 100644
index 0000000..75bff3b
--- /dev/null
+++ b/sys/amd64/amd64/kload_exec.S
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2011 - 2012
+ * Russell Cattelan Digital Elves Inc
+ * Copyright (c) 2011 - 2012
+ * Isilon Systems, LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "assym.s"
+
+#define CR0_PG 0x80000000 /* PaGing enable */
+
+#define X86_CR0_PE 0x00000001 /* Protection Enable */
+#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
+#define X86_CR0_EM 0x00000004 /* Emulation */
+#define X86_CR0_TS 0x00000008 /* Task Switched */
+#define X86_CR0_ET 0x00000010 /* Extension Type */
+#define X86_CR0_NE 0x00000020 /* Numeric Error */
+#define X86_CR0_WP 0x00010000 /* Write Protect */
+#define X86_CR0_AM 0x00040000 /* Alignment Mask */
+#define X86_CR0_NW 0x20000000 /* Not Write-through */
+#define X86_CR0_CD 0x40000000 /* Cache Disable */
+#define X86_CR0_PG 0x80000000 /* Paging */
+
+#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
+#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
+
+ .globl relocate_kernel
+relocate_kernel:
+ /* first install the new page table */
+ movq 32(%rcx), %rax /* page table */
+ movq 40(%rcx), %r9 /* address of control_page with new PT */
+ movq %rax, %cr3
+
+ /*
+ * Set cr4 to a known state:
+ * - page size extensions
+ * - physical address extension enabled
+ */
+ movq $(X86_CR4_PSE | X86_CR4_PAE), %rax
+ movq %rax, %cr4
+
+ /* then move the stack to the end of control page */
+ lea 4096(%r9), %rsp
+
+ /* now save stuff onto the new stack */
+ pushq %rcx /* arg 4 control page */
+ pushq %rdx /* arg 3 code page */
+ pushq %rsi /* arg 2 kern base */
+ pushq %rdi /* arg 1 va_list */
+
+ /* zero out flags, and disable interrupts */
+ pushq $0
+ popfq
+ cli
+
+ /* install simple gdt */
+ movq 24(%r9), %rax /* gdt */
+ lgdt (%rax)
+ movq 56(%r9), %rax
+ lidt (%rax) /* null idt */
+ /*
+ * now move to the code page
+ * should have been passed code_page based
+ * on new page table
+ */
+ movq %rdx, %r8
+ addq $(identity_mapped - relocate_kernel), %r8
+ /* offset of code segment in new gdt */
+ pushq $0x08
+ pushq %r8
+ /* jump to this spot in the new page */
+ lretq
+identity_mapped:
+
+ movq $0x10,%rax
+ movq %rax,%ds
+ movq %rax,%es
+ movq %rax,%fs
+ movq %rax,%gs
+ movq %rax,%ss
+
+ /*
+ * Set cr0 to a known state:
+ * - Paging enabled
+ * - Alignment check disabled
+ * - Write protect disabled
+ * - No task switch
+ * - Don't do FP software emulation.
+ * - Proctected mode enabled
+ */
+ movq %cr0, %rax
+ andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | X86_CR0_NE), %rax
+ orl $(X86_CR0_PG | X86_CR0_PE), %eax
+ movq %rax, %cr0
+
+ /* Do the copies */
+ cld
+ /* saved list of source pages */
+ movq 0(%rsp), %rbx
+ /*
+ * the initial dest page
+ * this is KERNBASE + 0x200000
+ * kernel is contigious in memory
+ */
+ movq 8(%rsp), %rdi
+0: /* top, read another word for the indirection page */
+ movq (%rbx), %rcx
+
+ addq $8, %rbx
+ testq $0x1, %rcx /* is it a destination page */
+ jz 1f
+ movq %rcx, %rdi
+ andq $0xFFFFFFFFfffff000, %rdi
+ jmp 0b
+1:
+ testq $0x2, %rcx /* is it an indirection page */
+ jz 1f
+ movq %rcx, %rbx
+ andq $0xFFFFFFFFfffff000, %rbx
+ jmp 0b
+1:
+ testq $0x4, %rcx /* is it the done indicator */
+ jz 1f
+ jmp 2f
+1:
+ testq $0x8, %rcx /* is it the source indicator */
+ jz 0b /* Ignore it otherwise */
+ movq %rcx, %rsi /* For every source page do a copy */
+ andq $0xfffffffffffff000, %rsi
+ movq $512, %rcx
+ rep
+ movsq
+ jmp 0b
+2:
+ /*
+ * set all of the registers to known values
+ * leave %rsp alone
+ */
+ xorq %rax, %rax
+ xorq %rbx, %rbx
+ xorq %rcx, %rcx
+ xorq %rdx, %rdx
+ xorq %rsi, %rsi
+ xorq %rdi, %rdi
+ xorq %rbp, %rbp
+
+ pushq 16(%r9) /* physfree */
+ movq 8(%r9), %rax /* modulep */
+ salq $32, %rax
+ pushq %rax
+
+ pushq $0x8
+ pushq 48(%r9) /* entry # kernel entry pt */
+ lretq
+relocate_kernel_end:
+ .globl relocate_kernel_size
+relocate_kernel_size:
+ .long relocate_kernel_end - relocate_kernel
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index d2e4aad..b085326 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -78,6 +78,9 @@ __FBSDID("$FreeBSD$");
#define BIOS_RESET (0x0f)
#define BIOS_WARM (0x0a)
+
+// quick hack to access the kload page table so we can set the APs to a know pgtbl */
+extern unsigned long kload_pgtbl;
/* lock region used by kernel profiling */
int mcount_lock;
@@ -1409,10 +1412,20 @@ cpustop_handler(void)
void
cpususpend_handler(void)
{
+ register_t cr3, rf;
+ register_t cr0, cr4;
u_int cpu;
cpu = PCPU_GET(cpuid);
+ printf("%s called on cpu%d\n",__FUNCTION__,cpu);
+
+ rf = intr_disable();
+ cr3 = rcr3();
+
+ lapic_clear_lapic(1 /* disable lapic */);
+ /* shutdown interrupts to the cpu and then set the mask as stopped */
+
if (savectx(susppcbs[cpu])) {
ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
wbinvd();
@@ -1422,20 +1435,37 @@ cpususpend_handler(void)
initializecpu();
PCPU_SET(switchtime, 0);
PCPU_SET(switchticks, ticks);
-
- /* Indicate that we are resumed */
- CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
+ /* make sure the page table is not the same one that boot process sets up */
+ load_cr3(kload_pgtbl);
+
+ /* Disable PGE. */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+
+ /* Disable caches (CD = 1, NW = 0) and paging*/
+ cr0 = rcr0();
+ load_cr0((cr0 & ~CR0_NW) | CR0_CD | CR0_PG);
+
+ /* Flushes caches and TLBs. */
+ wbinvd();
+ invltlb();
+
+ halt();
+
/* Wait for resume */
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
CPU_CLR_ATOMIC(cpu, &started_cpus);
+ CPU_CLR_ATOMIC(cpu, &stopped_cpus);
- /* Resume MCA and local APIC */
+ /* Restore CR3 and enable interrupts */
+ load_cr3(cr3);
mca_resume();
lapic_setup(0);
+ intr_restore(rf);
}
/*
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index ae2f5b9..7fded95 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -227,6 +227,7 @@ int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
enum intr_trigger trigger);
void lapic_set_tpr(u_int vector);
void lapic_setup(int boot);
+void lapic_clear_lapic(u_int);
#endif /* !LOCORE */
#endif /* _MACHINE_APICVAR_H_ */
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 700e35f..a8ef1fc 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -158,6 +158,7 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
+int intr_clear_all_handlers(void);
void intr_resume(void);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
diff --git a/sys/boot/common/load_elf.c b/sys/boot/common/load_elf.c
index e1e6de7..36df22c 100644
--- a/sys/boot/common/load_elf.c
+++ b/sys/boot/common/load_elf.c
@@ -317,25 +317,30 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off)
continue;
#ifdef ELF_VERBOSE
- printf("Segment: 0x%lx at 0x%lx -> 0x%lx-0x%lx",
- (long)phdr[i].p_filesz, (long)phdr[i].p_offset,
- (long)(phdr[i].p_vaddr + off),
- (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1));
+ printf("Segment: filesz 0x%llx @ 0x%016llx -> vaddr_range 0x%016llx - 0x%016llx",
+ (long long)phdr[i].p_filesz, (long long)phdr[i].p_offset,
+ (long long)(phdr[i].p_vaddr + off),
+ (long long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1));
#else
if ((phdr[i].p_flags & PF_W) == 0) {
- printf("text=0x%lx ", (long)phdr[i].p_filesz);
+ printf("text=0x%llx ", (long long)phdr[i].p_filesz);
} else {
- printf("data=0x%lx", (long)phdr[i].p_filesz);
+ printf("data=0x%llx", (long long)phdr[i].p_filesz);
if (phdr[i].p_filesz < phdr[i].p_memsz)
- printf("+0x%lx", (long)(phdr[i].p_memsz -phdr[i].p_filesz));
+ printf("+0x%llx", (long long)(phdr[i].p_memsz -phdr[i].p_filesz));
printf(" ");
}
#endif
fpcopy = 0;
if (ef->firstlen > phdr[i].p_offset) {
fpcopy = ef->firstlen - phdr[i].p_offset;
- archsw.arch_copyin(ef->firstpage + phdr[i].p_offset,
- phdr[i].p_vaddr + off, fpcopy);
+ printf("\n%s:%d firstpage 0x%lx p_offset 0x%lx p_vaddr 0x%lx off 0x%lx\n",
+ __FUNCTION__,__LINE__,
+ (unsigned long)ef->firstpage,
+ (unsigned long) phdr[i].p_offset,
+ (unsigned long)phdr[i].p_vaddr,
+ (unsigned long)off);
+ archsw.arch_copyin(ef->firstpage + phdr[i].p_offset, phdr[i].p_vaddr + off, fpcopy);
}
if (phdr[i].p_filesz > fpcopy) {
if (kern_pread(ef->fd, phdr[i].p_vaddr + off + fpcopy,
@@ -348,9 +353,11 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off)
/* clear space from oversized segments; eg: bss */
if (phdr[i].p_filesz < phdr[i].p_memsz) {
#ifdef ELF_VERBOSE
- printf(" (bss: 0x%lx-0x%lx)",
- (long)(phdr[i].p_vaddr + off + phdr[i].p_filesz),
- (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1));
+ printf("\n\t(bss: 0x%lx-0x%lx) vaddr 0x%lx size 0x%lx clearing\n",
+ (long)(phdr[i].p_vaddr + off + phdr[i].p_filesz),
+ (long)(phdr[i].p_vaddr + off + phdr[i].p_memsz - 1),
+ (long)(phdr[i].p_vaddr + phdr[i].p_filesz),
+ (long)(phdr[i].p_memsz - phdr[i].p_filesz) );
#endif
kern_bzero(phdr[i].p_vaddr + off + phdr[i].p_filesz,
diff --git a/sys/boot/userboot/ficl/Makefile b/sys/boot/userboot/ficl/Makefile
index 42b9309..d7818b9 100644
--- a/sys/boot/userboot/ficl/Makefile
+++ b/sys/boot/userboot/ficl/Makefile
@@ -62,6 +62,10 @@ softcore.c: ${SOFTWORDS} softcore.awk
(cd ${.CURDIR}/../../ficl/softwords; cat ${SOFTWORDS} \
| awk -f softcore.awk -v datestamp="`LC_ALL=C date`") > ${.TARGET}
+beforedepend ${OBJS}: no-machine
+
+no-machine:
+ rm -f ${.CURDIR}/../../ficl/machine
#.if ${MACHINE_CPUARCH} == "amd64"
#${SRCS:M*.c:R:S/$/.o/g}: machine
#
diff --git a/sys/boot/userboot/test/test.c b/sys/boot/userboot/test/test.c
index 36258a7..77202c1 100644
--- a/sys/boot/userboot/test/test.c
+++ b/sys/boot/userboot/test/test.c
@@ -376,6 +376,12 @@ test_getenv(void *arg, int idx)
return (vars[idx]);
}
+static int
+test_buildsmap(void *arg, void **smap_void, size_t *outlen)
+{
+ return (0);
+}
+
struct loader_callbacks cb = {
.putc = test_putc,
.getc = test_getc,
@@ -405,6 +411,7 @@ struct loader_callbacks cb = {
.getmem = test_getmem,
.getenv = test_getenv,
+ .buildsmap = test_buildsmap,
};
void
@@ -464,5 +471,5 @@ main(int argc, char** argv)
term.c_lflag &= ~(ICANON|ECHO);
tcsetattr(0, TCSAFLUSH, &term);
- func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0);
+ func(&cb, NULL, USERBOOT_VERSION_4, disk_fd >= 0);
}
diff --git a/sys/boot/userboot/userboot.h b/sys/boot/userboot/userboot.h
index e38927e..0a9d2f1 100644
--- a/sys/boot/userboot/userboot.h
+++ b/sys/boot/userboot/userboot.h
@@ -32,6 +32,7 @@
#define USERBOOT_VERSION_1 1
#define USERBOOT_VERSION_2 2
#define USERBOOT_VERSION_3 3
+#define USERBOOT_VERSION_4 4
/*
* Exit codes from the loader
@@ -195,4 +196,11 @@ struct loader_callbacks {
* each invocation will add 1 to the previous value of 'num'.
*/
const char * (*getenv)(void *arg, int num);
+
+ /*
+ * build system smap
+ * this is for kload to build pass back in a copy of the running
+ * systems smap returns
+ */
+ int (*buildsmap)(void *arg, void **smap, size_t *len);
};
diff --git a/sys/boot/userboot/userboot/bootinfo64.c b/sys/boot/userboot/userboot/bootinfo64.c
index fc7c14d..28c47ab 100644
--- a/sys/boot/userboot/userboot/bootinfo64.c
+++ b/sys/boot/userboot/userboot/bootinfo64.c
@@ -187,33 +187,46 @@ bios_addsmapdata(struct preloaded_file *kfp)
{
uint64_t lowmem, highmem;
int smapnum, len;
- struct smap smap[3], *sm;
+ struct smap *smap = NULL, *sm = NULL;
+ int error = 1;
- CALLBACK(getmem, &lowmem, &highmem);
+ printf("%s\n",__FUNCTION__);
- sm = &smap[0];
+ if (callbacks->buildsmap)
+ error = callbacks->buildsmap(NULL, (void **)&smap, &len);
- sm->base = 0; /* base memory */
- sm->length = 640 * 1024;
- sm->type = SMAP_TYPE_MEMORY;
- sm++;
+ /* either there is no buildsmap function or it failed
+ * revert back to using getmem and a simple smap
+ */
- sm->base = 0x100000; /* extended memory */
- sm->length = lowmem - 0x100000;
- sm->type = SMAP_TYPE_MEMORY;
- sm++;
+ if (error) {
+ smap = sm = malloc(3 * sizeof(struct smap));
+ CALLBACK(getmem, &lowmem, &highmem);
- smapnum = 2;
+ sm->base = 0; /* base memory */
+ sm->length = 640 * 1024;
+ sm->type = SMAP_TYPE_MEMORY;
+ sm++;
- if (highmem != 0) {
- sm->base = 4 * GB;
- sm->length = highmem;
- sm->type = SMAP_TYPE_MEMORY;
- smapnum++;
- }
+ sm->base = 0x100000; /* extended memory */
+ sm->length = lowmem - 0x100000;
+ sm->type = SMAP_TYPE_MEMORY;
+ sm++;
- len = smapnum * sizeof (struct smap);
- file_addmetadata(kfp, MODINFOMD_SMAP, len, &smap[0]);
+ smapnum = 2;
+
+ if (highmem != 0) {
+ sm->base = 4 * GB;
+ sm->length = highmem;
+ sm->type = SMAP_TYPE_MEMORY;
+ smapnum++;
+ }
+
+ len = smapnum * sizeof (struct smap);
+ }
+
+ file_addmetadata(kfp, MODINFOMD_SMAP, len, smap);
+ free(smap);
}
/*
diff --git a/sys/boot/userboot/userboot/conf.c b/sys/boot/userboot/userboot/conf.c
index 0c57eba..d2c1067 100644
--- a/sys/boot/userboot/userboot/conf.c
+++ b/sys/boot/userboot/userboot/conf.c
@@ -86,8 +86,11 @@ struct file_format *file_formats[] = {
* data structures from bootstrap.h as well.
*/
extern struct console userboot_console;
+extern struct console comconsole;
+
struct console *consoles[] = {
&userboot_console,
+ &comconsole,
NULL
};
diff --git a/sys/boot/userboot/userboot/main.c b/sys/boot/userboot/userboot/main.c
index 4092b9b..0e2e0b7 100644
--- a/sys/boot/userboot/userboot/main.c
+++ b/sys/boot/userboot/userboot/main.c
@@ -36,8 +36,9 @@ __FBSDID("$FreeBSD$");
#include "disk.h"
#include "libuserboot.h"
-#define USERBOOT_VERSION USERBOOT_VERSION_3
+#define USERBOOT_VERSION USERBOOT_VERSION_4
+static char malloc_buf[512*1024];
struct loader_callbacks *callbacks;
void *callbacks_arg;
@@ -67,31 +68,47 @@ exit(int v)
}
void
+loader_init(void)
+{
+ /*
+ * It does not hurt to re-call this as it just sets global
+ * ptrs that never change
+ */
+ setheap((void *)malloc_buf, (void *)(malloc_buf + 512*1024));
+}
+
+int
loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks)
{
- static char malloc[512*1024];
const char *var;
int i;
-
- if (version != USERBOOT_VERSION)
- abort();
-
- callbacks = cb;
- callbacks_arg = arg;
- userboot_disk_maxunit = ndisks;
-
+
/*
* initialise the heap as early as possible. Once this is done,
* alloc() is usable. The stack is buried inside us, so this is
* safe.
*/
- setheap((void *)malloc, (void *)(malloc + 512*1024));
+ loader_init();
+
+ if (cb != NULL) {
+ callbacks = cb;
+ callbacks_arg = arg;
+ userboot_disk_maxunit = ndisks;
+ } else {
+ return (EFAULT);
+ }
- /*
+ /*
* Hook up the console
*/
cons_probe();
+ if (version != USERBOOT_VERSION) {
+ printf("%s: version expected %d got %d\n", __func__,
+ USERBOOT_VERSION, version);
+ return(EOPNOTSUPP);
+ }
+
/*
* March through the device switch probing for things.
*/
@@ -128,11 +145,11 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks)
extract_currdev();
if (setjmp(jb))
- return;
+ return (0);
interact(); /* doesn't return */
- exit(0);
+ return(0);
}
/*
diff --git a/sys/boot/userboot/userboot/userboot_cons.c b/sys/boot/userboot/userboot/userboot_cons.c
index 5ecb7c8..5a9a573 100644
--- a/sys/boot/userboot/userboot/userboot_cons.c
+++ b/sys/boot/userboot/userboot/userboot_cons.c
@@ -50,6 +50,18 @@ struct console userboot_console = {
userboot_cons_poll,
};
+
+struct console comconsole = {
+ "comconsole",
+ "comsonsole",
+ 0,
+ userboot_cons_probe,
+ userboot_cons_init,
+ userboot_cons_putchar,
+ userboot_cons_getchar,
+ userboot_cons_poll,
+};
+
static void
userboot_cons_probe(struct console *cp)
{
diff --git a/sys/conf/files b/sys/conf/files
index 5554ec0..49de90a 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2607,6 +2607,7 @@ kern/kern_khelp.c standard
kern/kern_kthread.c standard
kern/kern_ktr.c optional ktr
kern/kern_ktrace.c standard
+kern/kern_kload.c standard
kern/kern_linker.c standard
kern/kern_lock.c standard
kern/kern_lockf.c standard
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index b6a474e..2447c7bd 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -93,6 +93,9 @@ acpi_wakedata.h optional acpi \
no-obj no-implicit-rule before-depend \
clean "acpi_wakedata.h"
#
+amd64/amd64/kload_exec.S standard
+amd64/amd64/kload.c standard
+#
amd64/amd64/amd64_mem.c optional mem
#amd64/amd64/apic_vector.S standard
amd64/amd64/atomic.c standard
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index 68c24e0..f81a05f 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib
+ * created from FreeBSD
*/
#include "opt_compat.h"
@@ -567,4 +567,5 @@ struct sysent sysent[] = {
{ AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 530 = posix_fallocate */
{ AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */
{ AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */
+ { AS(kload_args), (sy_call_t *)sys_kload, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 533 = kload */
};
diff --git a/sys/kern/kern_kload.c b/sys/kern/kern_kload.c
new file mode 100644
index 0000000..636830e
--- /dev/null
+++ b/sys/kern/kern_kload.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2011 - 2012
+ * Russell Cattelan Digital Elves Inc
+ * Copyright (c) 2011 - 2012
+ * Isilon Systems, LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
+#include <sys/kernel.h>
+#include <sys/kload.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/segments.h>
+
+#include <vm/vm_param.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_map.h>
+
+
+static struct kload_items *k_items = NULL;
+static MALLOC_DEFINE(M_KLOAD, "kload_items", "kload items");
+int kload_ready = 0;
+
+static vm_offset_t kload_image_va = 0;
+/*
+ * Warning this is somewhat arbitrary, but should go
+ * away once the allocate delays in kmem_alloc_attr are
+ * fixed.
+ */
+#define IMAGE_PREALLOC (24 * 1024 * 1024)
+
+static void kload_init(void);
+SYSINIT(kload_mem, SI_SUB_DRIVERS, SI_ORDER_ANY, kload_init, NULL);
+
+static int kload_copyin_segment(struct kload_segment *,int);
+static int kload_add_page(struct kload_items *, unsigned long);
+static void kload_shutdown_final(void *, int);
+static struct region_descriptor *mygdt;
+static vm_offset_t control_page;
+static vm_offset_t code_page;
+static void *gdt_desc;
+static pt_entry_t *pgtbl;
+unsigned long kload_pgtbl;
+static unsigned long max_addr = 0 , min_addr = 0;
+
+#define GIGMASK (~((1<<30)-1))
+#define ONEGIG (1<<30)
+#define GUEST_GDTR_LIMIT (3 * 8 - 1)
+
+extern char kernphys[];
+#define KLOADBASE KERNBASE
+
+static void
+update_max_min(vm_offset_t addr, int count)
+{
+ int i;
+
+ for(i = 0; i < count; i++) {
+ if (vtophys(addr + (i * PAGE_SIZE)) < min_addr)
+ min_addr = vtophys(addr + (i * PAGE_SIZE));
+ if (vtophys(addr + (i * PAGE_SIZE)) > max_addr)
+ max_addr = vtophys(addr + (i * PAGE_SIZE));
+ }
+}
+
+static vm_offset_t
+kload_kmem_alloc(vm_map_t map, vm_size_t size)
+{
+ vm_offset_t va;
+ int num_pages;
+
+ va = kmem_alloc_attr(map, size,
+ M_WAITOK | M_ZERO,
+ 0, (1 << 30) /* 1Gig limit */,
+ VM_MEMATTR_WRITE_COMBINING);
+
+ num_pages = roundup2(size,PAGE_SIZE) >> PAGE_SHIFT;
+ update_max_min(va, num_pages);
+
+ return (va);
+ }
+
+struct kload_cpage {
+ unsigned long kcp_magic; /* 0 */
+ unsigned long kcp_modulep; /* 1 */
+ unsigned long kcp_physfree; /* 2 */
+ unsigned long kcp_gdt; /* 3 */
+ unsigned long kcp_pgtbl; /* 4 */
+ unsigned long kcp_cp; /* 5 */
+ unsigned long kcp_entry_pt; /* 6 */
+ unsigned long kcp_idt; /* 7 */
+} __packed;
+
+static int
+kload_add_page(struct kload_items *items, unsigned long item_m)
+{
+ vm_paddr_t phys;
+ unsigned long va;
+
+ if (*items->item != 0) {
+ printf(" item != 0 0x%lx\n",*items->item);
+ items->item++;
+ items->i_count--;
+ }
+
+
+ if ((items->item == items->last_item) || (items->i_count == 0)) {
+ /* out of space in current page grab a new one */
+ va = (unsigned long)kload_kmem_alloc(kernel_map,PAGE_SIZE);
+ if (items->head_va == 0)
+ items->head_va = va;
+
+ phys = vtophys(va);
+ /* store the address of indrect page */
+ *items->item = (unsigned long)
+ (vtophys(va) + KLOADBASE) | KLOAD_INDIRECT;
+ items->item = (unsigned long *)va;
+ /* ok now move to new page to start storing address */
+ items->last_item = (unsigned long *)va +
+ ((PAGE_SIZE/sizeof(unsigned long)) - 1);
+ items->i_count = ((PAGE_SIZE/sizeof(unsigned long)) - 1);
+ }
+ *items->item = item_m;
+ items->item++;
+ items->i_count--;
+
+ return (0);
+}
+
+static void
+kload_init(void)
+{
+ int size = IMAGE_PREALLOC;
+ kload_image_va = kload_kmem_alloc(kernel_map, size);
+ printf("%s 0x%lx preallocated size %d\n", __func__,
+ kload_image_va, size);
+}
+
+int
+kload_copyin_segment(struct kload_segment *khdr, int seg)
+{
+ int i;
+ int num_pages;
+ int error = 0;
+ vm_offset_t va = kload_image_va;
+
+ num_pages = roundup2(khdr->k_memsz,PAGE_SIZE) >> PAGE_SHIFT;
+
+ /* check to make sure the preallocate space is beg enough */
+ if (va && ((num_pages * PAGE_SIZE) > IMAGE_PREALLOC)) {
+ printf("%s size over 24Meg %d\n", __func__,
+ num_pages * PAGE_SIZE);
+ kmem_free(kernel_map, va, IMAGE_PREALLOC);
+ va = 0;
+ }
+
+ if (va == 0) {
+ va = kload_kmem_alloc(kernel_map, num_pages * PAGE_SIZE);
+ if (va == 0)
+ return (ENOMEM);
+ }
+
+ /* need to set up a START dst page */
+ for (i = 0; i < num_pages; i++) {
+ kload_add_page(k_items,
+ (vtophys(va + (i * PAGE_SIZE)) + KLOADBASE) | KLOAD_SOURCE);
+ }
+ printf("%s starting copyin... ", __func__);
+ *k_items->item = KLOAD_DONE;
+ if ((error = copyin(khdr->k_buf, (void *)va, khdr->k_memsz)) != 0)
+ return (error);
+ printf("copied %d bytes to va %p done marker at %p\n",
+ (int)khdr->k_memsz, (void *)va, &k_items->item );
+
+ return (error);
+}
+
+int
+sys_kload(struct thread *td, struct kload_args *uap)
+{
+ struct region_descriptor *null_idt;
+ struct kload_cpage *k_cpage;
+ struct kload kld;
+ int error = 0;
+ int i;
+ size_t bufsize = uap->buflen;
+
+ error = priv_check(td, PRIV_REBOOT);
+ if (error)
+ return (error);
+
+ /*
+ * hook into the shutdown/reboot path so
+ * we end up here before cpu reset
+ */
+ EVENTHANDLER_REGISTER(shutdown_final, kload_shutdown_final,
+ NULL, SHUTDOWN_PRI_KLOAD);
+
+ max_addr = 0;
+ min_addr = ~0UL;
+
+ if (bufsize != sizeof(struct kload)) {
+ printf("Hmm size not right %jd %jd\n", (uintmax_t)bufsize,
+ (uintmax_t)sizeof(struct kload));
+ return (error);
+ }
+ if ((error = copyin(uap->buf, &kld, bufsize)) != 0)
+ return (error);
+
+ if (k_items == NULL) {
+ if((k_items = malloc(sizeof(struct kload_items),
+ M_KLOAD, M_WAITOK|M_ZERO)) == NULL)
+ return (ENOMEM);
+
+ k_items->head = 0;
+ k_items->head_va = 0;
+ k_items->item = &k_items->head;
+ k_items->last_item = &k_items->head;
+ }
+
+ control_page = kload_kmem_alloc(kernel_map, PAGE_SIZE * 2);
+ k_cpage = (struct kload_cpage *)control_page;
+ code_page = control_page + PAGE_SIZE;
+
+ printf("copy from %p kernel_kump to 0x%lx size %d\n",
+ relocate_kernel, (unsigned long)code_page, relocate_kernel_size);
+ memset((void *)control_page, 0, PAGE_SIZE * 2);
+ memcpy((void *)code_page, relocate_kernel, relocate_kernel_size);
+
+ k_cpage->kcp_magic = 0xC0DE;
+ k_cpage->kcp_modulep = kld.k_modulep;
+ k_cpage->kcp_physfree = kld.k_physfree;
+
+ mygdt = (struct region_descriptor *)kload_kmem_alloc(kernel_map,
+ PAGE_SIZE);
+ k_cpage->kcp_gdt = (unsigned long)vtophys(mygdt) + KLOADBASE;
+
+ gdt_desc = (char *)mygdt + sizeof(struct region_descriptor);
+ setup_freebsd_gdt(gdt_desc);
+ mygdt->rd_limit = GUEST_GDTR_LIMIT;
+ mygdt->rd_base = (unsigned long)(vtophys(gdt_desc) + KLOADBASE);
+
+ /*
+ * we pass the virt addr of control_page but we need
+ * new virt addr as well
+ */
+ k_cpage->kcp_cp = (unsigned long)(vtophys(control_page) + KLOADBASE);
+ k_cpage->kcp_entry_pt = kld.k_entry_pt;
+
+ /* 10 segments should be more than enough */
+ for (i = 0 ; (i < kld.num_hdrs && i <= 10); i++)
+ kload_copyin_segment(&kld.khdr[i],i);
+
+ null_idt = (struct region_descriptor*)
+ kload_kmem_alloc(kernel_map,PAGE_SIZE);
+ k_cpage->kcp_idt = (unsigned long)vtophys(null_idt) + KLOADBASE;
+ /* Wipe the IDT. */
+ null_idt->rd_limit = 0;
+ null_idt->rd_base = 0;
+ /*
+ * This must be built after all other allocations so it can
+ * build a page table entry based on min max addresses
+ */
+ /* returns new page table phys addr */
+ pgtbl = kload_build_page_table();
+ if (pgtbl == NULL)
+ return (ENOMEM);
+ kload_pgtbl = (unsigned long)pgtbl;
+ k_cpage->kcp_pgtbl = (unsigned long)pgtbl;
+
+ kload_ready = 1;
+
+ if (bootverbose)
+ printf("%s:\n\t"
+ "head_va 0x%lx (phys 0x%lx)\n\t"
+ "kernbase 0x%lx\n\t"
+ "code_page 0x%lx (phys 0x%lx)\n\t"
+ "control_page 0x%lx (phys 0x%lx)\n\t"
+ "gdt 0x%lx (phys 0x%lx)\n\t"
+ "idt 0x%lx (phys 0x%lx)\n\t"
+ "k_entry_pt 0x%lx\n\t"
+ "pgtbl (phys 0x%lx)\n\t"
+ "max_addr (phys 0x%lx)\n\t"
+ "min_addr (phys 0x%lx)\n\t"
+ "modulep (phys 0x%lx)\n\t"
+ "physfree (phys 0x%lx)\n",
+ __func__,
+ (unsigned long)k_items->head_va,
+ (unsigned long)vtophys(k_items->head_va),
+ (unsigned long)(KERNBASE + (vm_paddr_t)kernphys),
+ (unsigned long)(control_page + PAGE_SIZE),
+ (unsigned long)vtophys(control_page + PAGE_SIZE),
+ (unsigned long)control_page,
+ (unsigned long)vtophys(control_page),
+ (unsigned long)mygdt,(unsigned long)vtophys(mygdt),
+ (unsigned long)null_idt,(unsigned long)vtophys(null_idt),
+ (unsigned long)kld.k_entry_pt,
+ (unsigned long)pgtbl,
+ (unsigned long)max_addr,
+ (unsigned long)min_addr,
+ (unsigned long)kld.k_modulep,
+ (unsigned long)kld.k_physfree);
+
+ if(!(uap->flags & (KLOAD_EXEC | KLOAD_REBOOT)))
+ goto just_load;
+#if defined(SMP)
+ /*
+ * Bind us to CPU 0 so that all shutdown code runs there. Some
+ * systems don't shutdown properly (i.e., ACPI power off) if we
+ * run on another processor.
+ */
+ printf("Binding process to cpu 0\n");
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+ KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__));
+#endif
+ if(uap->flags & KLOAD_REBOOT) {
+ mtx_lock(&Giant);
+ kern_reboot(RB_KLOAD);
+ /* should not return */
+ mtx_unlock(&Giant);
+ }
+ /*
+ * the reboot code will do a module shutdown so it is not
+ * part kload_shutdown_final but it needs to happen.
+ * So in the case of exec run it here
+ */
+ if (bootverbose)
+ printf("%s: module_shutdown\n", __func__);
+ kload_module_shutdown();
+ kload_shutdown_final(NULL, RB_KLOAD);
+just_load:
+ printf("%s: Kernel image loaded waiting for reboot\n", __func__);
+ return (0);
+}
+
+static void
+kload_shutdown_final(void *arg, int howto)
+{
+ int ret;
+ cpuset_t map;
+
+ /* Just to make sure we are on cpu 0 */
+ KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__));
+ if (kload_ready) {
+ printf("%s: suspend APs\n",__FUNCTION__);
+ map = all_cpus;
+ /* we should be bound to cpu 0 at this point */
+ printf("%s cpuid %d\n",__FUNCTION__,PCPU_GET(cpuid));
+ CPU_CLR(PCPU_GET(cpuid), &map);
+ CPU_NAND(&map, &stopped_cpus);
+ if (!CPU_EMPTY(&map)) {
+ printf("cpu_reset: Stopping other CPUs\n");
+ suspend_cpus(map);
+ }
+
+ if (bootverbose)
+ printf("%s: clear all handlers\n", __func__);
+ intr_clear_all_handlers();
+
+ if (bootverbose)
+ printf("%s: loapic_clear_lapic\n", __func__);
+ lapic_clear_lapic(1);
+
+ intr_suspend();
+
+ if (bootverbose)
+ printf("%s disable_interrupts cpuid %d\n",
+ __func__, PCPU_GET(cpuid));
+ disable_intr();
+
+ printf("calling relocate_kernel\n");
+ ret = relocate_kernel(vtophys(k_items->head_va) + KLOADBASE,
+ /* dest addr i.e. overwrite existing kernel */
+ KERNBASE + (vm_paddr_t)kernphys,
+ vtophys(code_page) + KLOADBASE,
+ control_page);
+ /* currently this will never happen */
+ printf("\trelocate_new_kernel returned %d\n",ret);
+ } else {
+ printf("kload_shutdown_final called without "
+ "a new kernel loaded\n");
+ }
+}
diff --git a/sys/kern/kern_module.c b/sys/kern/kern_module.c
index b769320..2a880c5 100644
--- a/sys/kern/kern_module.c
+++ b/sys/kern/kern_module.c
@@ -64,6 +64,7 @@ static TAILQ_HEAD(modulelist, module) modules;
struct sx modules_sx;
static int nextid = 1;
static void module_shutdown(void *, int);
+void kload_module_shutdown(void);
static int
modevent_nop(module_t mod, int what, void *arg)
@@ -107,6 +108,12 @@ module_shutdown(void *arg1, int arg2)
}
void
+kload_module_shutdown(void) {
+ module_shutdown(NULL, 0);
+}
+
+
+void
module_register_init(const void *arg)
{
const moduledata_t *data = (const moduledata_t *)arg;
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index 96f2400..6edd7fa 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib
+ * created from FreeBSD
*/
const char *syscallnames[] = {
@@ -540,4 +540,5 @@ const char *syscallnames[] = {
"posix_fallocate", /* 530 = posix_fallocate */
"posix_fadvise", /* 531 = posix_fadvise */
"wait6", /* 532 = wait6 */
+ "kload", /* 533 = kload */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 148dea3..eb2b648 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -955,5 +955,7 @@
int *status, int options, \
struct __wrusage *wrusage, \
siginfo_t *info); }
+533 AUE_NULL STD { int kload(const void *buf, size_t buflen, \
+ int flags); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index c755f92..c48057a 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3286,6 +3286,15 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 6;
break;
}
+ /* kload */
+ case 533: {
+ struct kload_args *p = params;
+ uarg[0] = (intptr_t) p->buf; /* const void * */
+ uarg[1] = p->buflen; /* size_t */
+ iarg[2] = p->flags; /* int */
+ *n_args = 3;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -8745,6 +8754,22 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* kload */
+ case 533:
+ switch(ndx) {
+ case 0:
+ p = "const void *";
+ break;
+ case 1:
+ p = "size_t";
+ break;
+ case 2:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -10638,6 +10663,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* kload */
+ case 533:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h
index 6d37bf4..31aa4e4 100644
--- a/sys/sys/eventhandler.h
+++ b/sys/sys/eventhandler.h
@@ -173,6 +173,7 @@ typedef void (*shutdown_fn)(void *, int);
#define SHUTDOWN_PRI_FIRST EVENTHANDLER_PRI_FIRST
#define SHUTDOWN_PRI_DEFAULT EVENTHANDLER_PRI_ANY
#define SHUTDOWN_PRI_LAST EVENTHANDLER_PRI_LAST
+#define SHUTDOWN_PRI_KLOAD EVENTHANDLER_PRI_LAST - 100
EVENTHANDLER_DECLARE(shutdown_pre_sync, shutdown_fn); /* before fs sync */
EVENTHANDLER_DECLARE(shutdown_post_sync, shutdown_fn); /* after fs sync */
diff --git a/sys/sys/kload.h b/sys/sys/kload.h
new file mode 100644
index 0000000..0920176
--- /dev/null
+++ b/sys/sys/kload.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2011 - 2012
+ * Russell Cattelan Digital Elves Inc
+ * Copyright (c)
+ * Isilon Systems, LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef __KLOAD_H__
+#define __KLOAD_H__
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#define KLOAD_LOAD 0
+#define KLOAD_REBOOT (1 << 0 )
+#define KLOAD_EXEC (1 << 1 )
+
+struct kload_segment {
+ void *k_buf;
+ size_t k_memsz;
+ unsigned long *k_pages;
+ unsigned long k_seg_start;
+};
+
+struct kload {
+ struct kload_segment khdr[10];
+ int num_hdrs;
+ unsigned long k_entry_pt;
+ unsigned int k_modulep;
+ unsigned int k_physfree;
+};
+
+//typedef u_long kload_item_t;
+#define KLOAD_DESTINATION 0x1
+#define KLOAD_INDIRECT 0x2
+#define KLOAD_DONE 0x4
+#define KLOAD_SOURCE 0x8
+
+struct kload_items {
+ unsigned long head;
+ vm_offset_t head_va;
+ unsigned long *last_item;
+ unsigned long *item;
+ int i_count;
+ unsigned long flags; /* not used yet */
+};
+
+/*
+ * defined in <arch>/kload.c
+ */
+pt_entry_t * kload_build_page_table(void);
+void setup_freebsd_gdt(uint64_t *);
+void kload_module_shutdown(void);
+
+/*
+ * defined in <arch>/kload_exec.S
+ */
+unsigned long relocate_kernel(unsigned long indirection_page,
+ unsigned long page_list, unsigned long code_page,
+ unsigned long control_page);
+extern int relocate_kernel_size;
+
+#endif
diff --git a/sys/sys/reboot.h b/sys/sys/reboot.h
index 6b8e25e..9b70160 100644
--- a/sys/sys/reboot.h
+++ b/sys/sys/reboot.h
@@ -59,6 +59,7 @@
#define RB_RESERVED1 0x40000 /* reserved for internal use of boot blocks */
#define RB_RESERVED2 0x80000 /* reserved for internal use of boot blocks */
#define RB_PAUSE 0x100000 /* pause after each output line during probe */
+#define RB_KLOAD 0x200000 /* reboot using kload'ed kernel image */
#define RB_MULTIPLE 0x20000000 /* use multiple consoles */
#define RB_BOOTINFO 0x80000000 /* have `struct bootinfo *' arg */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 6b0cd67..f90cad2 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib
+ * created from FreeBSD
*/
#define SYS_syscall 0
@@ -452,4 +452,5 @@
#define SYS_posix_fallocate 530
#define SYS_posix_fadvise 531
#define SYS_wait6 532
-#define SYS_MAXSYSCALL 533
+#define SYS_kload 533
+#define SYS_MAXSYSCALL 534
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index 25f0470..39fd05b 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -1,7 +1,7 @@
# FreeBSD system call names.
# DO NOT EDIT-- this file is automatically generated.
# $FreeBSD$
-# created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib
+# created from FreeBSD
MIASM = \
syscall.o \
exit.o \
@@ -400,4 +400,5 @@ MIASM = \
rctl_remove_rule.o \
posix_fallocate.o \
posix_fadvise.o \
- wait6.o
+ wait6.o \
+ kload.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index ef59ad5..81dac15 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: head/sys/kern/syscalls.master 242958 2012-11-13 12:52:31Z kib
+ * created from FreeBSD
*/
#ifndef _SYS_SYSPROTO_H_
@@ -1762,6 +1762,11 @@ struct wait6_args {
char wrusage_l_[PADL_(struct __wrusage *)]; struct __wrusage * wrusage; char wrusage_r_[PADR_(struct __wrusage *)];
char info_l_[PADL_(siginfo_t *)]; siginfo_t * info; char info_r_[PADR_(siginfo_t *)];
};
+struct kload_args {
+ char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)];
+ char buflen_l_[PADL_(size_t)]; size_t buflen; char buflen_r_[PADR_(size_t)];
+ char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_sys_exit(struct thread *, struct sys_exit_args *);
int sys_fork(struct thread *, struct fork_args *);
@@ -2144,6 +2149,7 @@ int sys_rctl_remove_rule(struct thread *, struct rctl_remove_rule_args *);
int sys_posix_fallocate(struct thread *, struct posix_fallocate_args *);
int sys_posix_fadvise(struct thread *, struct posix_fadvise_args *);
int sys_wait6(struct thread *, struct wait6_args *);
+int sys_kload(struct thread *, struct kload_args *);
#ifdef COMPAT_43
@@ -2840,6 +2846,7 @@ int freebsd7_shmctl(struct thread *, struct freebsd7_shmctl_args *);
#define SYS_AUE_posix_fallocate AUE_NULL
#define SYS_AUE_posix_fadvise AUE_NULL
#define SYS_AUE_wait6 AUE_WAIT6
+#define SYS_AUE_kload AUE_NULL
#undef PAD_
#undef PADL_
diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c
index 31cc80b..eee7678 100644
--- a/sys/x86/x86/intr_machdep.c
+++ b/sys/x86/x86/intr_machdep.c
@@ -197,6 +197,37 @@ intr_add_handler(const char *name, int vector, driver_filter_t filter,
}
int
+intr_clear_all_handlers(void)
+{
+ int i;
+ struct intsrc *isrc;
+
+ mtx_lock(&intr_table_lock);
+ for (i = 0; i < NUM_IO_INTS; i++) {
+ isrc = interrupt_sources[i];
+ if (isrc != NULL && isrc->is_handlers > 0) {
+ printf("%s:%d isrc[%d] %p is_handlers %d\n",
+ __FUNCTION__,__LINE__,i,isrc,
+ isrc->is_handlers);
+ isrc->is_handlers--;
+ if (isrc->is_handlers == 0) {
+ printf("\t dis_source %p dis_intr %p\n",
+ isrc->is_pic->pic_disable_source,
+ isrc->is_pic->pic_disable_intr);
+ isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
+ isrc->is_pic->pic_disable_intr(isrc);
+ }
+ intrcnt_updatename(isrc);
+
+ }
+ }
+ mtx_unlock(&intr_table_lock);
+ return 0;
+}
+
+
+
+int
intr_remove_handler(void *cookie)
{
struct intsrc *isrc;
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index e994172..6593e8b 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -346,6 +346,60 @@ lapic_dump(const char* str)
}
void
+lapic_clear_lapic(u_int disable) {
+
+ struct lapic *la;
+ la = &lapics[lapic_id()];
+
+ uint32_t value;
+
+ if (bootverbose)
+ printf("%s lapic_id(%d) cpu(%d) la %p lapic %p\n",__FUNCTION__,
+ lapic_id(), PCPU_GET(cpuid), la, lapic);
+
+ /*
+ * Fist we set the mask bit to keep and new interrupts from
+ * arriving but allowing any pending interrupts to finish
+ * *THEN* set the registers to default values
+ * If the interrupts are not allowed to clear a kload'ed / booted
+ * kernel will see the old interrupts before the appropriate handlers
+ * are in place and trigger a panic.
+ */
+#ifdef notyet
+ /* this seems to be causing APIC error in the new kernel */
+ value = lapic->lvt_error;
+ value |= APIC_LVT_M;
+ lapic->lvt_error = value;
+#endif
+
+ value = lapic->lvt_timer;
+ value |= APIC_LVT_M;
+ lapic->lvt_timer = value;
+
+ value = lapic->lvt_lint0;
+ value |= APIC_LVT_M;
+ lapic->lvt_lint0 = value;
+
+ value = lapic->lvt_lint1;
+ value |= APIC_LVT_M;
+ lapic->lvt_lint1 = value;
+
+ value = lapic->lvt_pcint;
+ value |= APIC_LVT_M;
+ lapic->lvt_pcint = value;
+
+ /* Program timer LVT and setup handler. */
+ lapic->lvt_timer = APIC_LVTT_M; /* masked */
+ lapic->lvt_lint0 = APIC_LVT_M; /* masked */
+ lapic->lvt_lint1 = APIC_LVT_M; /* masked */
+
+ if (disable) {
+ printf("\tlapic disable\n");
+ lapic_disable();
+ }
+}
+
+void
lapic_setup(int boot)
{
struct lapic *la;
@@ -924,7 +978,20 @@ lapic_handle_error(void)
lapic->esr = 0;
esr = lapic->esr;
- printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
+ printf("CPU%d: local APIC error 0x%x\t", PCPU_GET(cpuid), esr);
+ if (lapic->esr & APIC_ESR_SEND_CS_ERROR)
+ printf("send_cs_error\n");
+ if (lapic->esr & APIC_ESR_RECEIVE_CS_ERROR)
+ printf("receive_cs_error\n");
+ if (lapic->esr & APIC_ESR_SEND_ACCEPT)
+ printf("send_accept\n");
+ if (lapic->esr & APIC_ESR_RECEIVE_ACCEPT)
+ printf("receive_accept\n");
+ if (lapic->esr & APIC_ESR_SEND_ILLEGAL_VECTOR)
+ printf("send_illegal_vector\n");
+ if (lapic->esr & APIC_ESR_ILLEGAL_REGISTER)
+ printf("illegal_register\n");
+
lapic_eoi();
}
diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c
index 9ead8c8..0b28465 100644
--- a/sys/x86/x86/nexus.c
+++ b/sys/x86/x86/nexus.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <sys/rman.h>
#include <sys/interrupt.h>
+#include <sys/sysctl.h>
#include <machine/vmparam.h>
#include <vm/vm.h>
@@ -675,6 +676,52 @@ ram_probe(device_t dev)
}
static int
+smap_hdlr(SYSCTL_HANDLER_ARGS) {
+
+ /* SYSCTL_HANDLER_ARGS
+ struct sysctl_oid *oidp, void *arg1,
+ intptr_t arg2, struct sysctl_req *req
+ */
+
+ struct bios_smap *smapbase;
+ caddr_t kmdp;
+ uint32_t smapsize = 0;
+
+ /* Retrieve the system memory map from the loader. */
+ kmdp = preload_search_by_type("elf kernel");
+ if (kmdp == NULL)
+ kmdp = preload_search_by_type(ELF_KERN_STR);
+ if (kmdp != NULL) {
+ smapbase = (struct bios_smap *)preload_search_info(kmdp,
+ MODINFO_METADATA | MODINFOMD_SMAP);
+ } else {
+ smapbase = NULL;
+ goto out;
+ }
+
+
+ printf("%s smapbase %p\n",__FUNCTION__,smapbase);
+ smapsize = *((u_int32_t *)smapbase - 1);
+
+#if 0
+ {
+ struct bios_smap *smap, *smapend;
+ smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
+ for (smap = smapbase; smap < smapend; smap++) {
+ printf("\ttype %d base 0x%lx length 0x%lx\n",
+ smap->type,smap->base, smap->length);
+ }
+ }
+#endif
+
+out:
+ return (sysctl_handle_opaque(oidp, smapbase, smapsize, req));
+}
+SYSCTL_PROC(_hw, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
+ 0, sizeof(struct bios_smap), smap_hdlr, "S,smap",
+ "Bios System Map");
+
+static int
ram_attach(device_t dev)
{
struct bios_smap *smapbase, *smap, *smapend;
diff --git a/usr.sbin/kload/Makefile b/usr.sbin/kload/Makefile
new file mode 100644
index 0000000..0d4a27a
--- /dev/null
+++ b/usr.sbin/kload/Makefile
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+PROG= kload
+SRCS= kload.c
+NO_MAN=
+
+#DPADD+= ${LIBVMMAPI}
+#LDADD+= -lvmmapi
+
+WARNS?= 3
+
+CFLAGS+=-I${.CURDIR}/../../sys/boot/userboot
+CFLAGS+=-I${.CURDIR}/../../sys
+
+.include <bsd.prog.mk>
diff --git a/usr.sbin/kload/kload.c b/usr.sbin/kload/kload.c
new file mode 100644
index 0000000..51ac3b7
--- /dev/null
+++ b/usr.sbin/kload/kload.c
@@ -0,0 +1,748 @@
+/*
+ * Copyright (c) 2011 - 2012
+ * Russell Cattelan Digital Elves Inc
+ * Copyright (c) 2011 - 2012
+ * Isilon Systems, LLC. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * process kill code borrowed from halt.c
+ */
+
+#include <sys/param.h>
+#include <sys/kload.h>
+#include <sys/ioctl.h>
+#include <sys/module.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <userboot.h>
+#include <unistd.h>
+
+char *host_base = "/";
+/* how can we get rid of these? I don't think we need them */
+
+struct termios term, oldterm;
+char *image;
+size_t image_size;
+size_t image_max_used = 0;
+int disk_fd = -1;
+uint64_t regs[16];
+uint64_t pc;
+static int k_execute = 0;
+static int k_reboot = 0;
+static void *dl_lib;
+typedef void *(*M_func)(size_t bytes, const char *file, int line);
+M_func Malloc_func;
+static void k_exit(void *, int);
+static int shutdown_processes(void);
+static u_int get_pageins(void);
+static int kload_load_image(void *image,unsigned long entry_pt);
+
+struct load_file {
+ int l_isdir;
+ size_t l_size;
+ struct stat l_stat;
+ union {
+ int fd;
+ DIR *dir;
+ } l_u;
+};
+
+struct smap {
+ uint64_t base;
+ uint64_t length;
+ uint32_t type;
+} __packed;
+
+static int
+name2oid(char *name, int *oidp)
+{
+ int oid[2];
+ int i;
+ size_t j;
+
+ oid[0] = 0;
+ oid[1] = 3;
+
+ j = CTL_MAXNAME * sizeof(int);
+ i = sysctl(oid, 2, oidp, &j, name, strlen(name));
+ if (i < 0)
+ return (i);
+ j /= sizeof(int);
+
+ return (j);
+}
+
+static void
+k_putc(void *arg, int chr)
+{
+ write(1, &chr, 1);
+}
+
+static int
+k_getc(void *arg)
+{
+ char chr;
+ if(read(0, &chr, 1) == 1)
+ return (chr);
+ return (-1);
+}
+
+static int
+k_poll(void *arg)
+{
+ int n;
+ if (ioctl(0, FIONREAD, &n) >= 0)
+ return (n > 0);
+ return 0;
+}
+
+static int
+k_open(void *arg, const char *filename, void **lf_ret)
+{
+ struct stat st;
+ struct load_file *lf;
+ int error = -1;
+ char path[PATH_MAX];
+
+ if (!host_base) {
+ printf("Host base not set\n");
+ return (ENOENT);
+ }
+
+ strlcpy(path, host_base, PATH_MAX);
+ if (path[strlen(path) - 1] == '/')
+ path[strlen(path) - 1] = 0;
+ strlcat(path, filename, PATH_MAX);
+ lf = malloc(sizeof(struct load_file));
+ if (stat(path, &lf->l_stat) < 0) {
+ error = errno;
+ goto out;
+ }
+
+ lf->l_size = st.st_size;
+ if (S_ISDIR(lf->l_stat.st_mode)) {
+ lf->l_isdir = 1;
+ lf->l_u.dir = opendir(path);
+ if (!lf->l_u.dir) {
+ error = EINVAL;
+ goto out;
+ }
+ *lf_ret = lf;
+ return (0);
+ }
+ if (S_ISREG(lf->l_stat.st_mode)) {
+ lf->l_isdir = 0;
+ lf->l_u.fd = open(path, O_RDONLY);
+ if (lf->l_u.fd < 0) {
+ error = EINVAL;
+ goto out;
+ }
+ *lf_ret = lf;
+ return (0);
+ }
+
+out:
+ free(lf);
+ return (error);
+}
+
+static int
+k_close(void *arg, void *h)
+{
+ struct load_file *lf = (struct load_file *)h;
+
+ if (lf->l_isdir)
+ closedir(lf->l_u.dir);
+ else
+ close(lf->l_u.fd);
+ free(lf);
+
+ return (0);
+}
+
+static int
+k_isdir(void *arg, void *h)
+{
+ return (((struct load_file *)h)->l_isdir);
+}
+
+static int
+k_read(void *arg, void *h, void *dst, size_t size, size_t *resid_return)
+{
+ struct load_file *lf = (struct load_file *)h;
+ ssize_t sz;
+
+ if (lf->l_isdir)
+ return (EINVAL);
+
+ if((sz = read(lf->l_u.fd, dst, size)) < 0)
+ return (EINVAL);
+ *resid_return = size - sz;
+ return (0);
+}
+
+static int
+k_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
+ size_t *namelen_return, char *name)
+{
+ struct load_file *lf = (struct load_file *)h;
+ struct dirent *dp;
+
+ if (!lf->l_isdir)
+ return (EINVAL);
+
+ dp = readdir(lf->l_u.dir);
+ if (!dp)
+ return (ENOENT);
+
+ /*
+ * Note: d_namlen is in the range 0..255 and therefore less
+ * than PATH_MAX so we don't need to test before copying.
+ */
+ *fileno_return = dp->d_fileno;
+ *type_return = dp->d_type;
+ *namelen_return = dp->d_namlen;
+ memcpy(name, dp->d_name, dp->d_namlen);
+ name[dp->d_namlen] = 0;
+
+ return (0);
+}
+
+static int
+k_seek(void *arg, void *h, uint64_t offset, int whence)
+{
+ struct load_file *lf = (struct load_file *)h;
+
+ if (lf->l_isdir)
+ return (EINVAL);
+
+ if (lseek(lf->l_u.fd, offset, whence) < 0)
+ return (errno);
+
+ return (0);
+}
+
+static int
+k_stat(void *arg, void *h,
+ int *mode_return, int *uid_return,
+ int *gid_return, uint64_t *size_return)
+{
+
+ struct load_file *lf = (struct load_file *)h;
+
+ *mode_return = lf->l_stat.st_mode;
+ *uid_return = lf->l_stat.st_uid;
+ *gid_return = lf->l_stat.st_gid;
+ *size_return = lf->l_stat.st_size;
+ return (0);
+}
+
+static int
+k_diskread(void *arg, int unit, uint64_t offset, void *dst, size_t size,
+ size_t *resid_return)
+{
+ ssize_t n;
+
+ if (unit != 0 || disk_fd == -1)
+ return (EIO);
+ n = pread(disk_fd, dst, size, offset);
+ if (n < 0)
+ return (errno);
+ *resid_return = size - n;
+ return (0);
+}
+
+static int
+k_diskioctl(void *arg, int unit, u_long cmd, void *data)
+{
+ /* not supported on by kload */
+ return (ENOTTY);
+}
+
+/*
+ * This is really confusing since this is not really like doing copyin / copyout
+ * in kernel land this will copy the data pointed to by the "from" ptr and copy
+ * "to" the offset into the load image
+ */
+static int
+k_copy_to_image(void *arg, const void *from, uint64_t to, size_t size)
+{
+ to &= 0x7fffffff;
+ if (to > image_size)
+ return (EFAULT);
+ if (to + size > image_size) {
+ size = image_size - to;
+ printf("WARNING this should never happen\n");
+ }
+ memcpy(&image[to], from, size);
+
+ if (to + size > image_max_used)
+ image_max_used = to + size;
+
+ return (0);
+}
+
+/*
+ * copyout is copying FROM the image at "from" offset to memory pointed to by to
+ * ptr
+ */
+static int
+k_copy_from_image(void *arg, uint64_t from, void *to, size_t size)
+{
+ from &= 0x7fffffff;
+ if (from > image_size)
+ return (EFAULT);
+ if (from + size > image_size)
+ size = image_size - from;
+ memcpy(to, &image[from], size);
+
+ return (0);
+}
+
+static void
+k_setreg(void *arg, int r, uint64_t v)
+{
+ if (r < 0 || r >= 16)
+ return;
+ regs[r] = v;
+}
+
+static void
+k_setmsr(void *arg, int r, uint64_t v)
+{
+ /* Unneeded */
+}
+
+static void
+k_setcr(void *arg, int r, uint64_t v)
+{
+ /* Unneeded */
+}
+
+static void
+k_setgdt(void *arg, uint64_t v, size_t sz)
+{
+ /* Unneeded */
+}
+
+static void
+k_exec(void *arg, uint64_t entry_pt)
+{
+#ifdef DEBUG
+ printf("Execute at 0x%jx\n", entry_pt);
+ printf("image size max used %jd endof page %jd\n", image_max_used,
+ roundup2(image_max_used, PAGE_SIZE));
+#endif
+ kload_load_image(image, entry_pt);
+ k_exit(arg, 0);
+}
+
+static void
+k_delay(void *arg, int usec)
+{
+ usleep(usec);
+}
+
+static void
+k_exit(void *arg, int v)
+{
+ tcsetattr(0, TCSAFLUSH, &oldterm);
+ exit(v);
+}
+
+static void
+k_getmem(void *arg, uint64_t *lowmem, uint64_t *highmem)
+{
+ int mib[2];
+ unsigned long long physmem;
+ size_t len;
+
+ mib[0] = CTL_HW;
+ mib[1] = HW_PHYSMEM;
+ len = sizeof(physmem);
+ sysctl(mib, 2, &physmem, &len, NULL, 0);
+
+ *lowmem = physmem;
+ *highmem = 0;
+
+ printf("%s:%d lowmem %ju highmem %ju\n",__FUNCTION__,__LINE__,
+ *lowmem,
+ *highmem
+ );
+}
+
+static const char *
+k_getenv(void *arg, int idx)
+{
+ static const char *vars[] = {
+ "foo=bar",
+ "bar=barbar",
+ NULL
+ };
+
+ return (vars[idx]);
+}
+
+static int
+k_buildsmap(void *arg, void **smap_void, size_t *outlen)
+{
+ struct smap *smapbase;
+ size_t i,j;
+ size_t len;
+ char name[] = "hw.smap";
+ int mib[CTL_MAXNAME];
+
+ len = name2oid(name, mib);
+
+ /* get the current smap from the running system */
+ i = sysctl(mib, 2, 0, &j, 0, 0);
+ len = j;
+
+ /*
+ * Use the malloc function from libstand/userboot.so since
+ * bios_addsmapdata will free the memory using the libstand Free
+ * so be careful to use not use standard malloc here
+ */
+ smapbase = Malloc_func(j, __FILE__, __LINE__);
+ if (!smapbase) {
+ printf("kload failed to allocate space for smap\n");
+ return 1;
+ }
+
+ i = sysctl(mib, 2, smapbase, &j, NULL, 0);
+
+ *outlen = len;
+ *smap_void = smapbase;
+
+#ifdef DEBUG
+ {
+ struct smap *smap, *smapend;
+ smapend = (struct smap *)((uintptr_t)smapbase + len);
+ for (smap = smapbase; smap < smapend; smap++) {
+ printf("\ttype %d base 0x%016lx length 0x%016lx\n",
+ smap->type, smap->base, smap->length);
+ }
+ }
+#endif
+
+ return 0;
+}
+
+struct loader_callbacks cb = {
+
+ .open = k_open,
+ .close = k_close,
+ .isdir = k_isdir,
+ .read = k_read,
+ .readdir = k_readdir,
+ .seek = k_seek,
+ .stat = k_stat,
+
+ .diskread = k_diskread,
+ .diskioctl = k_diskioctl,
+
+ .copyin = k_copy_to_image,
+ .copyout = k_copy_from_image,
+ .setreg = k_setreg,
+ .setmsr = k_setmsr,
+ .setcr = k_setcr,
+ .setgdt = k_setgdt,
+ .exec = k_exec,
+
+ .delay = k_delay,
+ .exit = k_exit,
+ .getmem = k_getmem,
+
+ .putc = k_putc,
+ .getc = k_getc,
+ .poll = k_poll,
+ .getenv = k_getenv,
+ .buildsmap = k_buildsmap,
+};
+
+static void
+usage(void)
+{
+ printf("usage: kload [-d <disk image path>] "
+ "[-h <host filesystem path>] [-e | -r]\n");
+ exit(1);
+}
+
+int
+main(int argc, char** argv)
+{
+ int (*loader_main)(struct loader_callbacks *, void *, int, int);
+ void (*loader_init)(void);
+ int (*setenv)(const char *, const char *, int);
+ int opt;
+ char *disk_image = NULL;
+ char karg[20];
+ char kval[128];
+
+ if (geteuid()) {
+ errno = EPERM;
+ err(1, NULL);
+ }
+
+ dl_lib = dlopen("/boot/userboot.so", RTLD_LOCAL);
+ if (!dl_lib) {
+ printf("%s\n", dlerror());
+ return (1);
+ }
+ loader_main = dlsym(dl_lib, "loader_main");
+ if (!loader_main) {
+ printf("%s\n", dlerror());
+ return (1);
+ }
+ Malloc_func = dlsym(dl_lib, "Malloc");
+ if (!Malloc_func) {
+ printf("%s\n", dlerror());
+ return (1);
+ }
+ /*
+ * pull in the libstand setenv for setting name value pairs
+ * in the kernel env page
+ */
+ setenv = dlsym(dl_lib, "setenv");
+ if (!setenv) {
+ printf("%s\n", dlerror());
+ return (1);
+ }
+ loader_init = dlsym(dl_lib, "loader_init");
+ if (!loader_init) {
+ printf("%s\n", dlerror());
+ return (1);
+ }
+ /* call libstand setheap to init memory allocations */
+ loader_init();
+
+ while ((opt = getopt(argc, argv, "d:h:erk:")) != -1) {
+ switch (opt) {
+ case 'd':
+ disk_image = optarg;
+ break;
+
+ case 'h':
+ host_base = optarg;
+ break;
+ case 'e':
+ k_execute = 1;
+ break;
+ case 'r':
+ k_reboot = 1;
+ break;
+ case 'k':
+ memset(karg,0,sizeof(karg));
+ memset(kval,0,sizeof(kval));
+ if(sscanf(optarg,"%[a-zA-Z_-]=%s",karg,kval) == 2) {
+ printf("got value %s %s\n",karg,kval);
+ setenv(karg, kval, 1);
+ } else {
+ fprintf(stderr,"-k failure %s\n",optarg);
+ }
+ break;
+
+ case '?':
+ usage();
+ }
+ }
+
+ image_size = 128*1024*1024;
+ image = malloc(image_size);
+ if (disk_image) {
+ disk_fd = open(disk_image, O_RDONLY);
+ if (disk_fd < 0)
+ err(1, "Can't open disk image '%s'", disk_image);
+ }
+
+ tcgetattr(0, &term);
+ oldterm = term;
+ term.c_iflag &= ~(ICRNL);
+ term.c_lflag &= ~(ICANON|ECHO);
+ tcsetattr(0, TCSAFLUSH, &term);
+
+ return(loader_main(&cb, NULL, USERBOOT_VERSION_4, disk_fd >= 0));
+}
+
+static int
+kload_load_image(void *image, unsigned long entry_pt)
+{
+ char *stack = (char *)image + 0x1000; /* PAGESIZE */
+ struct kload kld;
+ int flags = KLOAD_LOAD;
+ /*
+ * This must the same value sys/conf/ldscript.xxx
+ * This value was changed at one point when a new version
+ * of binutils was imported. The value is aligned to
+ * max page size supported by given processor
+ */
+ unsigned long kernphys = 0x200000;
+
+ kld.khdr[0].k_buf = &((char *)image)[kernphys];
+ kld.khdr[0].k_memsz = roundup2(image_max_used,PAGE_SIZE) - kernphys;
+ kld.k_entry_pt = entry_pt;
+ kld.num_hdrs = 1;
+
+ /*
+ * pull paramaters from the stack page
+ * a better interface should be developed for kload
+ * in the future
+ */
+ kld.k_modulep = ((unsigned int *)stack)[1];
+ kld.k_physfree = ((unsigned int *)stack)[2];
+
+ /*
+ * Make sure there is 4 pages of kenv pages between the end of the
+ * kernel and start of free memory.
+ * Why you ask? Well that is a question without a good answer as of yet
+ * for some strange reason some ata chips will not respond correctly
+ * unless free memory starts at greater than 2 pages out.
+ * The obvoius assumption is that something is getting stommped on but
+ * that has yet to be determined. Adding this workaround.
+ */
+ kld.k_physfree = MAX(kld.k_modulep + (4 * PAGE_SIZE), kld.k_physfree);
+
+ printf("WARNING kernphys set to 0x%lx make sure this matches kernphys "
+ "from sys/config/ldscript\n", kernphys);
+
+ if (k_execute) {
+ flags &= ~KLOAD_REBOOT;
+ flags |= KLOAD_EXEC;
+ }
+ if (k_reboot) {
+ flags &= ~KLOAD_EXEC;
+ flags |= KLOAD_REBOOT;
+ shutdown_processes();
+ }
+
+ return (syscall(SYS_kload, &kld, sizeof(struct kload), flags));
+}
+
+static int
+shutdown_processes(void)
+{
+ int i;
+ u_int pageins;
+ int sverrno;
+ /*
+ * Do a sync early on, so disks start transfers while we're off
+ * killing processes. Don't worry about writes done before the
+ * processes die, the reboot system call syncs the disks.
+ */
+ sync();
+
+ /*
+ * Ignore signals that we can get as a result of killing
+ * parents, group leaders, etc.
+ */
+ (void)signal(SIGHUP, SIG_IGN);
+ (void)signal(SIGINT, SIG_IGN);
+ (void)signal(SIGQUIT, SIG_IGN);
+ (void)signal(SIGTERM, SIG_IGN);
+ (void)signal(SIGTSTP, SIG_IGN);
+
+ /*
+ * If we're running in a pipeline, we don't want to die
+ * after killing whatever we're writing to.
+ */
+ (void)signal(SIGPIPE, SIG_IGN);
+
+ /* Just stop init -- if we fail, we'll restart it. */
+ if (kill(1, SIGTSTP) == -1)
+ err(1, "SIGTSTP init");
+
+ /* Send a SIGTERM first, a chance to save the buffers. */
+ if (kill(-1, SIGTERM) == -1 && errno != ESRCH)
+ err(1, "SIGTERM processes");
+
+ /*
+ * After the processes receive the signal, start the rest of the
+ * buffers on their way. Wait 5 seconds between the SIGTERM and
+ * the SIGKILL to give everybody a chance. If there is a lot of
+ * paging activity then wait longer, up to a maximum of approx
+ * 60 seconds.
+ */
+ sleep(2);
+ for (i = 0; i < 20; i++) {
+ pageins = get_pageins();
+ sync();
+ sleep(3);
+ if (get_pageins() == pageins)
+ break;
+ }
+
+ for (i = 1;; ++i) {
+ if (kill(-1, SIGKILL) == -1) {
+ if (errno == ESRCH)
+ break;
+ goto restart;
+ }
+ if (i > 5) {
+ (void)fprintf(stderr,
+ "WARNING: some process(es) wouldn't die\n");
+ break;
+ }
+ (void)sleep(2 * i);
+ }
+ return 1;
+restart:
+ sverrno = errno;
+ errx(1, "%s%s", kill(1, SIGHUP) == -1 ?
+ "(can't restart init): " : "", strerror(sverrno));
+ /* NOTREACHED */
+ return 0;
+}
+
+static u_int
+get_pageins(void)
+{
+ u_int pageins;
+ size_t len;
+
+ len = sizeof(pageins);
+ if (sysctlbyname("vm.stats.vm.v_swappgsin", &pageins, &len, NULL, 0)
+ != 0) {
+ warnx("v_swappgsin");
+ return (0);
+ }
+ return (pageins);
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 256 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freebsd.org/pipermail/freebsd-current/attachments/20121114/015f73c3/attachment-0001.sig>
More information about the freebsd-current
mailing list