git: 8371bf67d632 - stable/13 - linux: implement PTRACE_GET_SYSCALL_INFO

From: Edward Tomasz Napierala <trasz_at_FreeBSD.org>
Date: Mon, 21 Feb 2022 13:48:51 UTC
The branch stable/13 has been updated by trasz:

URL: https://cgit.FreeBSD.org/src/commit/?id=8371bf67d632ddc1117bbaa5956af260e62f26ac

commit 8371bf67d632ddc1117bbaa5956af260e62f26ac
Author:     Edward Tomasz Napierala <trasz@FreeBSD.org>
AuthorDate: 2021-09-12 11:31:10 +0000
Commit:     Edward Tomasz Napierala <trasz@FreeBSD.org>
CommitDate: 2022-02-21 13:23:50 +0000

    linux: implement PTRACE_GET_SYSCALL_INFO
    
    This is one of the pieces required to make modern (ie Focal)
    strace(1) work.
    
    Reviewed By:    jhb (earlier version)
    Sponsored by:   EPSRC
    Differential Revision:  https://reviews.freebsd.org/D28212
    
    (cherry picked from commit bdf0f24bb16d556a5b1e01cdfc087d08e91ac572)
---
 lib/libsysdecode/mktables             |  2 +-
 sys/amd64/linux/linux_ptrace.c        | 98 +++++++++++++++++++++++++++++++++--
 sys/compat/freebsd32/freebsd32_misc.c |  3 ++
 sys/kern/sys_process.c                | 17 ++++++
 sys/sys/ptrace.h                      |  4 ++
 5 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables
index 77cfa15bd1f5..f044784717c0 100644
--- a/lib/libsysdecode/mktables
+++ b/lib/libsysdecode/mktables
@@ -116,7 +116,7 @@ gen_table "nfssvcflags"     "NFSSVC_[A-Z0-9]+[[:space:]]+0x[0-9]+"         "nfs/
 gen_table "pathconfname"    "_PC_[A-Z4_]+[[:space:]]+[0-9]+"               "sys/unistd.h"
 gen_table "prio"            "PRIO_[A-Z]+[[:space:]]+[0-9]"                 "sys/resource.h"
 gen_table "procctlcmd"      "PROC_[A-Z_]+[[:space:]]+[0-9]"                 "sys/procctl.h"	"PROC_TRACE_CTL_"
-gen_table "ptraceop"        "PT_[[:alnum:]_]+[[:space:]]+[0-9]+"           "sys/ptrace.h"
+gen_table "ptraceop"        "PT_[[:alnum:]_]+[[:space:]]+[0-9]+"           "sys/ptrace.h"	"PT_GET_SC_ARGS_ALL"
 gen_table "quotactlcmds"    "Q_[A-Z]+[[:space:]]+0x[0-9]+"                 "ufs/ufs/quota.h"
 gen_table "rebootopt"       "RB_[A-Z]+[[:space:]]+0x[0-9]+"                "sys/reboot.h"
 gen_table "rforkflags"      "RF[A-Z]+[[:space:]]+\([0-9]+<<[0-9]+\)"       "sys/unistd.h"
diff --git a/sys/amd64/linux/linux_ptrace.c b/sys/amd64/linux/linux_ptrace.c
index 6516453dd5e4..fa400e0d3529 100644
--- a/sys/amd64/linux/linux_ptrace.c
+++ b/sys/amd64/linux/linux_ptrace.c
@@ -94,6 +94,12 @@ __FBSDID("$FreeBSD$");
     LINUX_PTRACE_O_TRACESECCOMP | LINUX_PTRACE_O_EXITKILL |	\
     LINUX_PTRACE_O_SUSPEND_SECCOMP)
 
+#define	LINUX_PTRACE_SYSCALL_INFO_NONE	0
+#define	LINUX_PTRACE_SYSCALL_INFO_ENTRY	1
+#define	LINUX_PTRACE_SYSCALL_INFO_EXIT	2
+
+#define	LINUX_ARCH_AMD64		0xc000003e
+
 static int
 map_signum(int lsig, int *bsigp)
 {
@@ -173,6 +179,28 @@ struct linux_pt_reg {
 	l_ulong	ss;
 };
 
+struct syscall_info {
+	uint8_t op;
+	uint32_t arch;
+	uint64_t instruction_pointer;
+	uint64_t stack_pointer;
+	union {
+		struct {
+			uint64_t nr;
+			uint64_t args[6];
+		} entry;
+		struct {
+			int64_t rval;
+			uint8_t is_error;
+		} exit;
+		struct {
+			uint64_t nr;
+			uint64_t args[6];
+			uint32_t ret_data;
+		} seccomp;
+	};
+};
+
 /*
  * Translate amd64 ptrace registers between Linux and FreeBSD formats.
  * The translation is pretty straighforward, for all registers but
@@ -529,11 +557,75 @@ linux_ptrace_seize(struct thread *td, pid_t pid, l_ulong addr, l_ulong data)
 }
 
 static int
-linux_ptrace_get_syscall_info(struct thread *td, pid_t pid, l_ulong addr, l_ulong data)
+linux_ptrace_get_syscall_info(struct thread *td, pid_t pid,
+    l_ulong addr, l_ulong data)
 {
+	struct ptrace_lwpinfo lwpinfo;
+	struct ptrace_sc_ret sr;
+	struct reg b_reg;
+	struct syscall_info si;
+	int error;
 
-	linux_msg(td, "PTRACE_GET_SYSCALL_INFO not implemented; returning EINVAL");
-	return (EINVAL);
+	error = kern_ptrace(td, PT_LWPINFO, pid, &lwpinfo, sizeof(lwpinfo));
+	if (error != 0) {
+		linux_msg(td, "PT_LWPINFO failed with error %d", error);
+		return (error);
+	}
+
+	memset(&si, 0, sizeof(si));
+
+	if (lwpinfo.pl_flags & PL_FLAG_SCE) {
+		si.op = LINUX_PTRACE_SYSCALL_INFO_ENTRY;
+		si.entry.nr = lwpinfo.pl_syscall_code;
+		/*
+		 * The reason for using PT_GET_SC_ARGS_ALL instead
+		 * of PT_GET_SC_ARGS is to emulate Linux bug which strace(1)
+		 * depends on: at initialization it tests whether ptrace works
+		 * by calling close(2), or some other single-argument syscall,
+		 * _with six arguments_, and then verifies whether it can
+		 * fetch them all using this API; otherwise it bails out.
+		 */
+		error = kern_ptrace(td, PT_GET_SC_ARGS_ALL, pid,
+		    &si.entry.args, sizeof(si.entry.args));
+		if (error != 0) {
+			linux_msg(td, "PT_GET_SC_ARGS_ALL failed with error %d",
+			    error);
+			return (error);
+		}
+	} else if (lwpinfo.pl_flags & PL_FLAG_SCX) {
+		si.op = LINUX_PTRACE_SYSCALL_INFO_EXIT;
+		error = kern_ptrace(td, PT_GET_SC_RET, pid, &sr, sizeof(sr));
+
+		if (error != 0) {
+			linux_msg(td, "PT_GET_SC_RET failed with error %d",
+			    error);
+			return (error);
+		}
+
+		if (sr.sr_error == 0) {
+			si.exit.rval = sr.sr_retval[0];
+			si.exit.is_error = 0;
+		} else {
+			si.exit.rval = bsd_to_linux_errno(sr.sr_error);
+			si.exit.is_error = 1;
+		}
+	} else {
+		si.op = LINUX_PTRACE_SYSCALL_INFO_NONE;
+	}
+
+	error = kern_ptrace(td, PT_GETREGS, pid, &b_reg, 0);
+	if (error != 0)
+		return (error);
+
+	si.arch = LINUX_ARCH_AMD64;
+	si.instruction_pointer = b_reg.r_rip;
+	si.stack_pointer = b_reg.r_rsp;
+
+	error = copyout(&si, (void *)data, sizeof(si));
+	if (error == 0)
+		td->td_retval[0] = sizeof(si);
+
+	return (error);
 }
 
 int
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index e56ab423f538..b7128debd97f 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -1025,6 +1025,9 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 		r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit);
 		data = sizeof(r.pc);
 		break;
+	case PT_GET_SC_ARGS_ALL:
+		error = EINVAL;
+		break;
 	default:
 		addr = uap->addr;
 		break;
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index eb628c040cd9..75d950a57057 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -532,6 +532,9 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 		else
 			error = copyin(uap->addr, &r.pc, uap->data);
 		break;
+	case PT_GET_SC_ARGS_ALL:
+		error = EINVAL;
+		break;
 	default:
 		addr = uap->addr;
 		break;
@@ -713,6 +716,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 	case PT_SET_EVENT_MASK:
 	case PT_DETACH:
 	case PT_GET_SC_ARGS:
+	case PT_GET_SC_ARGS_ALL:
 		sx_xlock(&proctree_lock);
 		proctree_locked = true;
 		break;
@@ -1016,6 +1020,19 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		    sizeof(register_t));
 		break;
 
+	case PT_GET_SC_ARGS_ALL:
+		CTR1(KTR_PTRACE, "PT_GET_SC_ARGS_ALL: pid %d", p->p_pid);
+		if ((td2->td_dbgflags & (TDB_SCE | TDB_SCX)) == 0
+#ifdef COMPAT_FREEBSD32
+		    || (wrap32 && !safe)
+#endif
+		    ) {
+			error = EINVAL;
+			break;
+		}
+		bcopy(td2->td_sa.args, addr, sizeof(td2->td_sa.args));
+		break;
+
 	case PT_GET_SC_RET:
 		if ((td2->td_dbgflags & (TDB_SCX)) == 0
 #ifdef COMPAT_FREEBSD32
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index 4cd7a3fceaec..de7a49576c30 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -86,6 +86,10 @@
 #define	PT_VM_TIMESTAMP	40	/* Get VM version (timestamp) */
 #define	PT_VM_ENTRY	41	/* Get VM map (entry) */
 
+#ifdef _KERNEL
+#define	PT_GET_SC_ARGS_ALL	42	/* Used by linux(4) */
+#endif
+
 #define PT_FIRSTMACH    64	/* for machine-specific requests */
 #include <machine/ptrace.h>	/* machine-specific requests, if any */