git: ff0179726635 - stable/13 - Refactor core dumping code a bit

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Thu, 12 May 2022 22:56:24 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=ff01797266354f17cebe609ab7de7a041e6f0970

commit ff01797266354f17cebe609ab7de7a041e6f0970
Author:     Edward Tomasz Napierala <trasz@FreeBSD.org>
AuthorDate: 2021-05-22 08:58:35 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-05-12 22:12:59 +0000

    Refactor core dumping code a bit
    
    This makes it possible to use core_write(), core_output(),
    and sbuf_drain_core_output(), in Linux coredump code.  Moving
    them out of imgact_elf.c is necessary because of the weird way
    it's being built.
    
    Reviewed By:    kib
    Sponsored By:   EPSRC
    Differential Revision:  https://reviews.freebsd.org/D30369
    
    (cherry picked from commit 33621dfc196e317026aa8b9d916567598a1cedcb)
---
 sys/kern/imgact_elf.c | 164 --------------------------------------------------
 sys/kern/kern_exec.c  | 151 +++++++++++++++++++++++++++++++++++++++++++++-
 sys/sys/exec.h        |  19 ++++++
 3 files changed, 169 insertions(+), 165 deletions(-)

diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index a8f3c6959b3b..d2cee91e0531 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -106,8 +106,6 @@ SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE),
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "");
 
-#define	CORE_BUF_SIZE	(16 * 1024)
-
 int __elfN(fallback_brand) = -1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO,
     fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0,
@@ -1524,23 +1522,11 @@ struct note_info {
 
 TAILQ_HEAD(note_info_list, note_info);
 
-/* Coredump output parameters. */
-struct coredump_params {
-	off_t		offset;
-	struct ucred	*active_cred;
-	struct ucred	*file_cred;
-	struct thread	*td;
-	struct vnode	*vp;
-	struct compressor *comp;
-};
-
 extern int compress_user_cores;
 extern int compress_user_cores_level;
 
 static void cb_put_phdr(vm_map_entry_t, void *);
 static void cb_size_segment(vm_map_entry_t, void *);
-static int core_write(struct coredump_params *, const void *, size_t, off_t,
-    enum uio_seg, size_t *);
 static void each_dumpable_segment(struct thread *, segment_callback, void *,
     int);
 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t,
@@ -1550,7 +1536,6 @@ static void __elfN(prepare_notes)(struct thread *, struct note_info_list *,
 static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t, int);
 static void __elfN(putnote)(struct note_info *, struct sbuf *);
 static size_t register_note(struct note_info_list *, int, outfunc_t, void *);
-static int sbuf_drain_core_output(void *, const char *, int);
 
 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *);
 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *);
@@ -1568,34 +1553,6 @@ static void note_procstat_rlimit(void *, struct sbuf *, size_t *);
 static void note_procstat_umask(void *, struct sbuf *, size_t *);
 static void note_procstat_vmmap(void *, struct sbuf *, size_t *);
 
-/*
- * Write out a core segment to the compression stream.
- */
-static int
-compress_chunk(struct coredump_params *p, char *base, char *buf, size_t len)
-{
-	size_t chunk_len;
-	int error;
-
-	while (len > 0) {
-		chunk_len = MIN(len, CORE_BUF_SIZE);
-
-		/*
-		 * We can get EFAULT error here.
-		 * In that case zero out the current chunk of the segment.
-		 */
-		error = copyin(base, buf, chunk_len);
-		if (error != 0)
-			bzero(buf, chunk_len);
-		error = compressor_write(p->comp, buf, chunk_len);
-		if (error != 0)
-			break;
-		base += chunk_len;
-		len -= chunk_len;
-	}
-	return (error);
-}
-
 static int
 core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 {
@@ -1604,127 +1561,6 @@ core_compressed_write(void *base, size_t len, off_t offset, void *arg)
 	    UIO_SYSSPACE, NULL));
 }
 
-static int
-core_write(struct coredump_params *p, const void *base, size_t len,
-    off_t offset, enum uio_seg seg, size_t *resid)
-{
-
-	return (vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, base),
-	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
-	    p->active_cred, p->file_cred, resid, p->td));
-}
-
-extern int core_dump_can_intr;
-
-static int
-core_output(char *base, size_t len, off_t offset, struct coredump_params *p,
-    void *tmpbuf)
-{
-	vm_map_t map;
-	struct mount *mp;
-	size_t resid, runlen;
-	int error;
-	bool success;
-
-	KASSERT((uintptr_t)base % PAGE_SIZE == 0,
-	    ("%s: user address %p is not page-aligned", __func__, base));
-
-	if (p->comp != NULL)
-		return (compress_chunk(p, base, tmpbuf, len));
-
-	map = &p->td->td_proc->p_vmspace->vm_map;
-	for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
-		/*
-		 * Attempt to page in all virtual pages in the range.  If a
-		 * virtual page is not backed by the pager, it is represented as
-		 * a hole in the file.  This can occur with zero-filled
-		 * anonymous memory or truncated files, for example.
-		 */
-		for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
-			if (core_dump_can_intr && curproc_sigkilled())
-				return (EINTR);
-			error = vm_fault(map, (uintptr_t)base + runlen,
-			    VM_PROT_READ, VM_FAULT_NOFILL, NULL);
-			if (runlen == 0)
-				success = error == KERN_SUCCESS;
-			else if ((error == KERN_SUCCESS) != success)
-				break;
-		}
-
-		if (success) {
-			error = core_write(p, base, runlen, offset,
-			    UIO_USERSPACE, &resid);
-			if (error != 0) {
-				if (error != EFAULT)
-					break;
-
-				/*
-				 * EFAULT may be returned if the user mapping
-				 * could not be accessed, e.g., because a mapped
-				 * file has been truncated.  Skip the page if no
-				 * progress was made, to protect against a
-				 * hypothetical scenario where vm_fault() was
-				 * successful but core_write() returns EFAULT
-				 * anyway.
-				 */
-				runlen -= resid;
-				if (runlen == 0) {
-					success = false;
-					runlen = PAGE_SIZE;
-				}
-			}
-		}
-		if (!success) {
-			error = vn_start_write(p->vp, &mp, V_WAIT);
-			if (error != 0)
-				break;
-			vn_lock(p->vp, LK_EXCLUSIVE | LK_RETRY);
-			error = vn_truncate_locked(p->vp, offset + runlen,
-			    false, p->td->td_ucred);
-			VOP_UNLOCK(p->vp);
-			vn_finished_write(mp);
-			if (error != 0)
-				break;
-		}
-	}
-	return (error);
-}
-
-/*
- * Drain into a core file.
- */
-static int
-sbuf_drain_core_output(void *arg, const char *data, int len)
-{
-	struct coredump_params *p;
-	int error, locked;
-
-	p = (struct coredump_params *)arg;
-
-	/*
-	 * Some kern_proc out routines that print to this sbuf may
-	 * call us with the process lock held. Draining with the
-	 * non-sleepable lock held is unsafe. The lock is needed for
-	 * those routines when dumping a live process. In our case we
-	 * can safely release the lock before draining and acquire
-	 * again after.
-	 */
-	locked = PROC_LOCKED(p->td->td_proc);
-	if (locked)
-		PROC_UNLOCK(p->td->td_proc);
-	if (p->comp != NULL)
-		error = compressor_write(p->comp, __DECONST(char *, data), len);
-	else
-		error = core_write(p, __DECONST(void *, data), len, p->offset,
-		    UIO_SYSSPACE, NULL);
-	if (locked)
-		PROC_LOCK(p->td->td_proc);
-	if (error != 0)
-		return (-error);
-	p->offset += len;
-	return (len);
-}
-
 int
 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
 {
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 407c5ab3ed0e..5be0dee94197 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/acct.h>
 #include <sys/asan.h>
 #include <sys/capsicum.h>
+#include <sys/compressor.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
@@ -151,7 +152,7 @@ static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
-int core_dump_can_intr = 1;
+static int core_dump_can_intr = 1;
 SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN,
     &core_dump_can_intr, 0,
     "Core dumping interruptible with SIGKILL");
@@ -1968,3 +1969,151 @@ exec_unregister(const struct execsw *execsw_arg)
 	execsw = newexecsw;
 	return (0);
 }
+
+/*
+ * Write out a core segment to the compression stream.
+ */
+static int
+compress_chunk(struct coredump_params *p, char *base, char *buf, size_t len)
+{
+	size_t chunk_len;
+	int error;
+
+	while (len > 0) {
+		chunk_len = MIN(len, CORE_BUF_SIZE);
+
+		/*
+		 * We can get EFAULT error here.
+		 * In that case zero out the current chunk of the segment.
+		 */
+		error = copyin(base, buf, chunk_len);
+		if (error != 0)
+			bzero(buf, chunk_len);
+		error = compressor_write(p->comp, buf, chunk_len);
+		if (error != 0)
+			break;
+		base += chunk_len;
+		len -= chunk_len;
+	}
+	return (error);
+}
+
+int
+core_write(struct coredump_params *p, const void *base, size_t len,
+    off_t offset, enum uio_seg seg, size_t *resid)
+{
+
+	return (vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, base),
+	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
+	    p->active_cred, p->file_cred, resid, p->td));
+}
+
+int
+core_output(char *base, size_t len, off_t offset, struct coredump_params *p,
+    void *tmpbuf)
+{
+	vm_map_t map;
+	struct mount *mp;
+	size_t resid, runlen;
+	int error;
+	bool success;
+
+	KASSERT((uintptr_t)base % PAGE_SIZE == 0,
+	    ("%s: user address %p is not page-aligned", __func__, base));
+
+	if (p->comp != NULL)
+		return (compress_chunk(p, base, tmpbuf, len));
+
+	map = &p->td->td_proc->p_vmspace->vm_map;
+	for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
+		/*
+		 * Attempt to page in all virtual pages in the range.  If a
+		 * virtual page is not backed by the pager, it is represented as
+		 * a hole in the file.  This can occur with zero-filled
+		 * anonymous memory or truncated files, for example.
+		 */
+		for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
+			if (core_dump_can_intr && curproc_sigkilled())
+				return (EINTR);
+			error = vm_fault(map, (uintptr_t)base + runlen,
+			    VM_PROT_READ, VM_FAULT_NOFILL, NULL);
+			if (runlen == 0)
+				success = error == KERN_SUCCESS;
+			else if ((error == KERN_SUCCESS) != success)
+				break;
+		}
+
+		if (success) {
+			error = core_write(p, base, runlen, offset,
+			    UIO_USERSPACE, &resid);
+			if (error != 0) {
+				if (error != EFAULT)
+					break;
+
+				/*
+				 * EFAULT may be returned if the user mapping
+				 * could not be accessed, e.g., because a mapped
+				 * file has been truncated.  Skip the page if no
+				 * progress was made, to protect against a
+				 * hypothetical scenario where vm_fault() was
+				 * successful but core_write() returns EFAULT
+				 * anyway.
+				 */
+				runlen -= resid;
+				if (runlen == 0) {
+					success = false;
+					runlen = PAGE_SIZE;
+				}
+			}
+		}
+		if (!success) {
+			error = vn_start_write(p->vp, &mp, V_WAIT);
+			if (error != 0)
+				break;
+			vn_lock(p->vp, LK_EXCLUSIVE | LK_RETRY);
+			error = vn_truncate_locked(p->vp, offset + runlen,
+			    false, p->td->td_ucred);
+			VOP_UNLOCK(p->vp);
+			vn_finished_write(mp);
+			if (error != 0)
+				break;
+		}
+	}
+	return (error);
+}
+
+/*
+ * Drain into a core file.
+ */
+int
+sbuf_drain_core_output(void *arg, const char *data, int len)
+{
+	struct coredump_params *p;
+	int error, locked;
+
+	p = (struct coredump_params *)arg;
+
+	/*
+	 * Some kern_proc out routines that print to this sbuf may
+	 * call us with the process lock held. Draining with the
+	 * non-sleepable lock held is unsafe. The lock is needed for
+	 * those routines when dumping a live process. In our case we
+	 * can safely release the lock before draining and acquire
+	 * again after.
+	 */
+	locked = PROC_LOCKED(p->td->td_proc);
+	if (locked)
+		PROC_UNLOCK(p->td->td_proc);
+	if (p->comp != NULL)
+		error = compressor_write(p->comp, __DECONST(char *, data), len);
+	else
+		error = core_write(p, __DECONST(void *, data), len, p->offset,
+		    UIO_SYSSPACE, NULL);
+	if (locked)
+		PROC_LOCK(p->td->td_proc);
+	if (error != 0)
+		return (-error);
+	p->offset += len;
+	return (len);
+}
+
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index 94d2f698bd63..82ee16befe28 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -60,6 +60,16 @@ struct ps_strings {
 	unsigned int ps_nenvstr; /* the number of environment strings */
 };
 
+/* Coredump output parameters. */
+struct coredump_params {
+	off_t		offset;
+	struct ucred	*active_cred;
+	struct ucred	*file_cred;
+	struct thread	*td;
+	struct vnode	*vp;
+	struct compressor *comp;
+};
+
 struct image_params;
 
 struct execsw {
@@ -86,6 +96,15 @@ void exec_unmap_first_page(struct image_params *);
 int exec_register(const struct execsw *);
 int exec_unregister(const struct execsw *);
 
+enum uio_seg;
+
+#define   CORE_BUF_SIZE   (16 * 1024)
+
+int core_write(struct coredump_params *, const void *, size_t, off_t,
+    enum uio_seg, size_t *);
+int core_output(char *, size_t, off_t, struct coredump_params *, void *);
+int sbuf_drain_core_output(void *, const char *, int);
+
 extern int coredump_pack_fileinfo;
 extern int coredump_pack_vmmapinfo;