git: 758e72c0a820 - stable/13 - Add new vnode dumper to support live minidumps
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 27 Jun 2022 19:35:34 UTC
The branch stable/13 has been updated by mhorne:
URL: https://cgit.FreeBSD.org/src/commit/?id=758e72c0a8204b10e66c5b106aeba051f819c7dc
commit 758e72c0a8204b10e66c5b106aeba051f819c7dc
Author: Mitchell Horne <mhorne@FreeBSD.org>
AuthorDate: 2021-03-23 20:47:14 +0000
Commit: Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2022-06-27 19:32:06 +0000
Add new vnode dumper to support live minidumps
This dumper can instantiate and write the dump's contents to a
file-backed vnode.
Unlike existing disk or network dumpers, the vnode dumper should not be
invoked during a system panic, and therefore is not added to the global
dumper_configs list. Instead, the vnode dumper is constructed ad-hoc
when a live dump is requested using the new ioctl on /dev/mem. This is
similar in spirit to a kgdb session against the live system via
/dev/mem.
As described briefly in the mem(4) man page, live dumps are not
guaranteed to result in a usuable output file, but offer some debugging
value where forcefully panicing a system to dump its memory is not
desirable/feasible.
A future change to savecore(8) will add an option to save a live dump.
Reviewed by: markj, Pau Amma <pauamma@gundo.com> (manpages)
Discussed with: kib
MFC after: 3 weeks
Sponsored by: Juniper Networks, Inc.
Sponsored by: Klara, Inc.
Differential Revision: https://reviews.freebsd.org/D33813
(cherry picked from commit c9114f9f86f92742eacd1d802c34009a57e81055)
---
share/man/man4/mem.4 | 62 ++++++++++++++
sys/conf/files | 1 +
sys/dev/mem/memdev.c | 6 ++
sys/kern/kern_shutdown.c | 14 ++-
sys/kern/kern_vnodedumper.c | 202 ++++++++++++++++++++++++++++++++++++++++++++
sys/sys/conf.h | 1 +
sys/sys/kerneldump.h | 2 +
sys/sys/memrange.h | 10 +++
8 files changed, 296 insertions(+), 2 deletions(-)
diff --git a/share/man/man4/mem.4 b/share/man/man4/mem.4
index f860df036428..6370d2a95525 100644
--- a/share/man/man4/mem.4
+++ b/share/man/man4/mem.4
@@ -202,6 +202,50 @@ to update an existing or establish a new range, or to
.Dv MEMRANGE_SET_REMOVE
to remove a range.
.El
+.Ss Live Kernel Dumps
+.Pp
+The
+.Dv MEM_KERNELDUMP
+ioctl will initiate a kernel dump against the running system, the contents of
+which will be written to a process-owned file descriptor.
+The resulting dump output will be in minidump format.
+The request is described by
+.Bd -literal
+struct mem_livedump_arg {
+ int fd; /* input */
+ int flags /* input */
+ uint8_t compression /* input */
+};
+.Ed
+.Pp
+The
+.Va fd
+field is used to pass the file descriptor.
+.Pp
+The
+.Va flags
+field is currently unused and must be set to zero.
+.Pp
+The
+.Va compression
+field can be used to specify the desired compression to
+be applied to the dump output.
+The supported values are defined in
+.In sys/kerneldump.h ;
+that is,
+.Dv KERNELDUMP_COMP_NONE ,
+.Dv KERNELDUMP_COMP_GZIP ,
+or
+.Dv KERNELDUMP_COMP_ZSTD .
+.Pp
+Kernel dumps taken against the running system may have inconsistent kernel data
+structures due to allocation, deallocation, or modification of memory
+concurrent to the dump procedure.
+Thus, the resulting core dump is not guaranteed to be usable.
+A system under load is more likely to produce an inconsistent result.
+Despite this, live kernel dumps can be useful for offline debugging of certain
+types of kernel bugs, such as deadlocks, or in inspecting a particular part of
+the system's state.
.Sh RETURN VALUES
.Ss MEM_EXTRACT_PADDR
The
@@ -229,6 +273,24 @@ base/length supplied.
An attempt to remove a range failed because the range is permanently
enabled.
.El
+.Ss MEM_KERNELDUMP
+.Bl -tag -width Er
+.It Bq Er EOPNOTSUPP
+Kernel minidumps are not supported on this architecture.
+.It Bq Er EPERM
+An attempt to begin the kernel dump failed because the calling thread lacks the
+.It Bq Er EBADF
+The supplied file descriptor was invalid, or does not have write permission.
+.It Bq Er EBUSY
+An attempt to begin the kernel dump failed because one is already in progress.
+.It Bq Er EINVAL
+An invalid or unsupported value was specified in
+.Va flags .
+.It Bq Er EINVAL
+An invalid or unsupported compression type was specified.
+.Dv PRIV_KMEM_READ
+privilege.
+.El
.Sh FILES
.Bl -tag -width /dev/kmem -compact
.It Pa /dev/mem
diff --git a/sys/conf/files b/sys/conf/files
index 81781ab0bc66..1a729bf84c96 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3888,6 +3888,7 @@ kern/kern_tslog.c optional tslog
kern/kern_ubsan.c optional kubsan
kern/kern_umtx.c standard
kern/kern_uuid.c standard
+kern/kern_vnodedumper.c standard
kern/kern_xxx.c standard
kern/link_elf.c standard
kern/linker_if.m standard
diff --git a/sys/dev/mem/memdev.c b/sys/dev/mem/memdev.c
index f03550aaa495..7d33066f5678 100644
--- a/sys/dev/mem/memdev.c
+++ b/sys/dev/mem/memdev.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
+#include <sys/kerneldump.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
@@ -96,6 +97,7 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
{
vm_map_t map;
vm_map_entry_t entry;
+ const struct mem_livedump_arg *marg;
struct mem_extract *me;
int error;
@@ -120,6 +122,10 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
}
vm_map_unlock_read(map);
break;
+ case MEM_KERNELDUMP:
+ marg = (const struct mem_livedump_arg *)data;
+ error = livedump_start(marg->fd, marg->flags, marg->compression);
+ break;
default:
error = memioctl_md(dev, cmd, data, flags, td);
break;
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 37544e364ee2..0f31622903bf 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -383,6 +383,17 @@ print_uptime(void)
printf("%lds\n", (long)ts.tv_sec);
}
+/*
+ * Set up a context that can be extracted from the dump.
+ */
+void
+dump_savectx(void)
+{
+
+ savectx(&dumppcb);
+ dumptid = curthread->td_tid;
+}
+
int
doadump(boolean_t textdump)
{
@@ -395,8 +406,7 @@ doadump(boolean_t textdump)
if (TAILQ_EMPTY(&dumper_configs))
return (ENXIO);
- savectx(&dumppcb);
- dumptid = curthread->td_tid;
+ dump_savectx();
dumping++;
coredump = TRUE;
diff --git a/sys/kern/kern_vnodedumper.c b/sys/kern/kern_vnodedumper.c
new file mode 100644
index 000000000000..c8fdce5e550a
--- /dev/null
+++ b/sys/kern/kern_vnodedumper.c
@@ -0,0 +1,202 @@
+/*-
+ * Copyright (c) 2021-2022 Juniper Networks
+ *
+ * This software was developed by Mitchell Horne <mhorne@FreeBSD.org>
+ * under sponsorship from Juniper Networks and Klara Systems.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/caprights.h>
+#include <sys/disk.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/kerneldump.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+
+#include <machine/vmparam.h>
+
+static dumper_start_t vnode_dumper_start;
+static dumper_t vnode_dump;
+static dumper_hdr_t vnode_write_headers;
+
+static struct sx livedump_sx;
+SX_SYSINIT(livedump, &livedump_sx, "Livedump sx");
+
+/*
+ * Invoke a live minidump on the system.
+ */
+int
+livedump_start(int fd, int flags, uint8_t compression)
+{
+#if MINIDUMP_PAGE_TRACKING == 1
+ struct dumperinfo di, *livedi;
+ struct diocskerneldump_arg kda;
+ struct vnode *vp;
+ struct file *fp;
+ void *rl_cookie;
+ int error;
+
+ error = priv_check(curthread, PRIV_KMEM_READ);
+ if (error != 0)
+ return (error);
+
+ if (flags != 0)
+ return (EINVAL);
+
+ error = getvnode(curthread, fd, &cap_write_rights, &fp);
+ if (error != 0)
+ return (error);
+ vp = fp->f_vnode;
+
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto drop;
+ }
+
+ /* Set up a new dumper. */
+ bzero(&di, sizeof(di));
+ di.dumper_start = vnode_dumper_start;
+ di.dumper = vnode_dump;
+ di.dumper_hdr = vnode_write_headers;
+ di.blocksize = PAGE_SIZE; /* Arbitrary. */
+ di.maxiosize = MAXDUMPPGS * PAGE_SIZE;
+
+ bzero(&kda, sizeof(kda));
+ kda.kda_compression = compression;
+ error = dumper_create(&di, "livedump", &kda, &livedi);
+ if (error != 0)
+ goto drop;
+
+ /* Only allow one livedump to proceed at a time. */
+ if (sx_try_xlock(&livedump_sx) == 0) {
+ dumper_destroy(livedi);
+ error = EBUSY;
+ goto drop;
+ }
+
+ /* To be used by the callback functions. */
+ livedi->priv = vp;
+
+ /* Lock the entire file range and vnode. */
+ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+
+ dump_savectx();
+ error = minidumpsys(livedi, true);
+
+ VOP_UNLOCK(vp);
+ vn_rangelock_unlock(vp, rl_cookie);
+ sx_xunlock(&livedump_sx);
+ dumper_destroy(livedi);
+drop:
+ fdrop(fp, curthread);
+ return (error);
+#else
+ return (EOPNOTSUPP);
+#endif /* MINIDUMP_PAGE_TRACKING == 1 */
+}
+
+int
+vnode_dumper_start(struct dumperinfo *di, void *key, uint32_t keysize)
+{
+
+ /* Always begin with an offset of zero. */
+ di->dumpoff = 0;
+
+ KASSERT(keysize == 0, ("encryption not supported for livedumps"));
+ return (0);
+}
+
+/*
+ * Callback from dumpsys() to dump a chunk of memory.
+ *
+ * Parameters:
+ * arg Opaque private pointer to vnode
+ * virtual Virtual address (where to read the data from)
+ * physical Physical memory address (unused)
+ * offset Offset from start of core file
+ * length Data length
+ *
+ * Return value:
+ * 0 on success
+ * errno on error
+ */
+int
+vnode_dump(void *arg, void *virtual, vm_offset_t physical __unused,
+ off_t offset, size_t length)
+{
+ struct vnode *vp;
+ int error = 0;
+
+ vp = arg;
+ MPASS(vp != NULL);
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ /* Done? */
+ if (virtual == NULL)
+ return (0);
+
+ error = vn_rdwr(UIO_WRITE, vp, virtual, length, offset, UIO_SYSSPACE,
+ IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, curthread);
+ if (error != 0)
+ uprintf("%s: error writing livedump block at offset %jx: %d\n",
+ __func__, (uintmax_t)offset, error);
+ return (error);
+}
+
+/*
+ * Callback from dumpsys() to write out the dump header, placed at the end.
+ */
+int
+vnode_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
+{
+ struct vnode *vp;
+ int error;
+ off_t offset;
+
+ vp = di->priv;
+ MPASS(vp != NULL);
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ /* Compensate for compression/encryption adjustment of dumpoff. */
+ offset = roundup2(di->dumpoff, di->blocksize);
+
+ /* Write the kernel dump header to the end of the file. */
+ error = vn_rdwr(UIO_WRITE, vp, kdh, sizeof(*kdh), offset,
+ UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL,
+ curthread);
+ if (error != 0)
+ uprintf("%s: error writing livedump header: %d\n", __func__,
+ error);
+ return (error);
+}
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index 7e9631154802..02671a065c09 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -362,6 +362,7 @@ struct dumperinfo {
extern int dumping; /* system is dumping */
+void dump_savectx(void);
int doadump(boolean_t);
struct diocskerneldump_arg;
int dumper_create(const struct dumperinfo *di_template, const char *devname,
diff --git a/sys/sys/kerneldump.h b/sys/sys/kerneldump.h
index c293491eadc9..2c73790bc81d 100644
--- a/sys/sys/kerneldump.h
+++ b/sys/sys/kerneldump.h
@@ -162,6 +162,8 @@ void dumpsys_pb_progress(size_t);
extern int do_minidump;
+int livedump_start(int, int, uint8_t);
+
#endif
#endif /* _SYS_KERNELDUMP_H */
diff --git a/sys/sys/memrange.h b/sys/sys/memrange.h
index 454b033775f4..d3eeeb79b664 100644
--- a/sys/sys/memrange.h
+++ b/sys/sys/memrange.h
@@ -59,6 +59,16 @@ struct mem_extract {
#define MEM_EXTRACT_PADDR _IOWR('m', 52, struct mem_extract)
+struct mem_livedump_arg {
+ int fd;
+ int flags;
+ uint8_t compression;
+ uint8_t pad1[7];
+ uint64_t pad2[2];
+};
+
+#define MEM_KERNELDUMP _IOW('m', 53, struct mem_livedump_arg)
+
#ifdef _KERNEL
MALLOC_DECLARE(M_MEMDESC);