Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV
- In reply to: Konstantin Belousov : "Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 13 Nov 2021 19:56:10 UTC
On 13 Nov 2021, at 19:39, Konstantin Belousov <kostikbel@gmail.com> wrote:
> On Sat, Nov 13, 2021 at 07:10:54PM +0000, Jessica Clarke wrote:
>> On 13 Nov 2021, at 19:09, Jessica Clarke <jrtc27@freebsd.org> wrote:
>>>
>>> On 13 Nov 2021, at 19:06, Konstantin Belousov <kostikbel@gmail.com> wrote:
>>>> On Sat, Nov 13, 2021 at 08:59:00PM +0200, Konstantin Belousov wrote:
>>>>> On Sat, Nov 13, 2021 at 06:29:24PM +0000, Jessica Clarke wrote:
>>>>>> On 13 Nov 2021, at 17:57, Jessica Clarke <jrtc27@FreeBSD.org> wrote:
>>>>>>>
>>>>>>> On 13 Nov 2021, at 17:54, Jessica Clarke <jrtc27@FreeBSD.org> wrote:
>>>>>>>>
>>>>>>>> On 13 Nov 2021, at 17:33, Konstantin Belousov <kib@FreeBSD.org> wrote:
>>>>>>>>>
>>>>>>>>> The branch main has been updated by kib:
>>>>>>>>>
>>>>>>>>> URL: https://cgit.FreeBSD.org/src/commit/?id=64ba1f4cf3a6847a1dacf4bab0409d94898fa168
>>>>>>>>>
>>>>>>>>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168
>>>>>>>>> Author: Konstantin Belousov <kib@FreeBSD.org>
>>>>>>>>> AuthorDate: 2021-11-13 01:18:13 +0000
>>>>>>>>> Commit: Konstantin Belousov <kib@FreeBSD.org>
>>>>>>>>> CommitDate: 2021-11-13 17:33:13 +0000
>>>>>>>>>
>>>>>>>>> rtld: Implement LD_SHOW_AUXV
>>>>>>>>>
>>>>>>>>> It dumps auxv as seen by interpreter, right before starting any user
>>>>>>>>> code.
>>>>>>>>>
>>>>>>>>> Copied from: glibc
>>>>>>>>> Sponsored by: The FreeBSD Foundation
>>>>>>>>> MFC after: 1 week
>>>>>>>>> ---
>>>>>>>>> libexec/rtld-elf/rtld.1 | 7 +++++-
>>>>>>>>> libexec/rtld-elf/rtld.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>> 2 files changed, 73 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1
>>>>>>>>> index 187dc105667a..66aa2bdabd17 100644
>>>>>>>>> --- a/libexec/rtld-elf/rtld.1
>>>>>>>>> +++ b/libexec/rtld-elf/rtld.1
>>>>>>>>> @@ -28,7 +28,7 @@
>>>>>>>>> .\"
>>>>>>>>> .\" $FreeBSD$
>>>>>>>>> .\"
>>>>>>>>> -.Dd August 15, 2021
>>>>>>>>> +.Dd November 13, 2021
>>>>>>>>> .Dt RTLD 1
>>>>>>>>> .Os
>>>>>>>>> .Sh NAME
>>>>>>>>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the loaded objects immediately,
>>>>>>>>> instead of postponing it until required.
>>>>>>>>> Normally, the filtees are opened at the time of the first symbol resolution
>>>>>>>>> from the filter object.
>>>>>>>>> +.It Ev LD_SHOW_AUXV
>>>>>>>>> +If set, causes
>>>>>>>>> +.Nm
>>>>>>>>> +to dump content of the aux vector to standard output, before passing
>>>>>>>>> +control to any user code.
>>>>>>>>> .El
>>>>>>>>> .Sh DIRECT EXECUTION MODE
>>>>>>>>> .Nm
>>>>>>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
>>>>>>>>> index c173c5a6e22e..0475134b0d96 100644
>>>>>>>>> --- a/libexec/rtld-elf/rtld.c
>>>>>>>>> +++ b/libexec/rtld-elf/rtld.c
>>>>>>>>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj,
>>>>>>>>> static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int);
>>>>>>>>> static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *);
>>>>>>>>> static bool donelist_check(DoneList *, const Obj_Entry *);
>>>>>>>>> +static void dump_auxv(Elf_Auxinfo **aux_info);
>>>>>>>>> static void errmsg_restore(struct dlerror_save *);
>>>>>>>>> static struct dlerror_save *errmsg_save(void);
>>>>>>>>> static void *fill_search_info(const char *, size_t, void *);
>>>>>>>>> @@ -364,6 +365,7 @@ enum {
>>>>>>>>> LD_TRACE_LOADED_OBJECTS_FMT1,
>>>>>>>>> LD_TRACE_LOADED_OBJECTS_FMT2,
>>>>>>>>> LD_TRACE_LOADED_OBJECTS_ALL,
>>>>>>>>> + LD_SHOW_AUXV,
>>>>>>>>> };
>>>>>>>>>
>>>>>>>>> struct ld_env_var_desc {
>>>>>>>>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] = {
>>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false),
>>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false),
>>>>>>>>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false),
>>>>>>>>> + LD_ENV_DESC(SHOW_AUXV, false),
>>>>>>>>> };
>>>>>>>>>
>>>>>>>>> static const char *
>>>>>>>>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp)
>>>>>>>>> if (rtld_verify_versions(&list_main) == -1 && !ld_tracing)
>>>>>>>>> rtld_die();
>>>>>>>>>
>>>>>>>>> + if (ld_get_env_var(LD_SHOW_AUXV) != NULL)
>>>>>>>>> + dump_auxv(aux_info);
>>>>>>>>> +
>>>>>>>>> if (ld_tracing) { /* We're done */
>>>>>>>>> trace_loaded_objects(obj_main);
>>>>>>>>> exit(0);
>>>>>>>>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0)
>>>>>>>>> " <args> Arguments to the executed process\n", argv0);
>>>>>>>>> }
>>>>>>>>>
>>>>>>>>> +#define AUXFMT(at, xfmt) [at] = { .name = #at, .fmt = xfmt }
>>>>>>>>> +static const struct auxfmt {
>>>>>>>>> + const char *name;
>>>>>>>>> + const char *fmt;
>>>>>>>>> +} auxfmts[] = {
>>>>>>>>> + AUXFMT(AT_NULL, NULL),
>>>>>>>>> + AUXFMT(AT_IGNORE, NULL),
>>>>>>>>> + AUXFMT(AT_EXECFD, "%d"),
>>>>>>>>> + AUXFMT(AT_PHDR, "%p"),
>>>>>>>>> + AUXFMT(AT_PHENT, "%u"),
>>>>>>>>> + AUXFMT(AT_PHNUM, "%u"),
>>>>>>>>> + AUXFMT(AT_PAGESZ, "%u"),
>>>>>>>>> + AUXFMT(AT_BASE, "%#lx"),
>>>>>>>>> + AUXFMT(AT_FLAGS, "%#lx"),
>>>>>>>>> + AUXFMT(AT_ENTRY, "%p"),
>>>>>>>>> + AUXFMT(AT_NOTELF, NULL),
>>>>>>>>> + AUXFMT(AT_UID, "%d"),
>>>>>>>>> + AUXFMT(AT_EUID, "%d"),
>>>>>>>>> + AUXFMT(AT_GID, "%d"),
>>>>>>>>> + AUXFMT(AT_EGID, "%d"),
>>>>>>>>> + AUXFMT(AT_EXECPATH, "%s"),
>>>>>>>>> + AUXFMT(AT_CANARY, "%p"),
>>>>>>>>> + AUXFMT(AT_CANARYLEN, "%u"),
>>>>>>>>> + AUXFMT(AT_OSRELDATE, "%u"),
>>>>>>>>> + AUXFMT(AT_NCPUS, "%u"),
>>>>>>>>> + AUXFMT(AT_PAGESIZES, "%p"),
>>>>>>>>> + AUXFMT(AT_PAGESIZESLEN, "%u"),
>>>>>>>>> + AUXFMT(AT_TIMEKEEP, "%p"),
>>>>>>>>> + AUXFMT(AT_STACKPROT, "%#x"),
>>>>>>>>> + AUXFMT(AT_EHDRFLAGS, "%#lx"),
>>>>>>>>> + AUXFMT(AT_HWCAP, "%#lx"),
>>>>>>>>> + AUXFMT(AT_HWCAP2, "%#lx"),
>>>>>>>>> + AUXFMT(AT_BSDFLAGS, "%#lx"),
>>>>>>>>> + AUXFMT(AT_ARGC, "%u"),
>>>>>>>>> + AUXFMT(AT_ARGV, "%p"),
>>>>>>>>> + AUXFMT(AT_ENVC, "%p"),
>>>>>>>>> + AUXFMT(AT_ENVV, "%p"),
>>>>>>>>> + AUXFMT(AT_PS_STRINGS, "%p"),
>>>>>>>>> + AUXFMT(AT_FXRNG, "%p"),
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>> +static void
>>>>>>>>> +dump_auxv(Elf_Auxinfo **aux_info)
>>>>>>>>> +{
>>>>>>>>> + Elf_Auxinfo *auxp;
>>>>>>>>> + const struct auxfmt *fmt;
>>>>>>>>> + int i;
>>>>>>>>> +
>>>>>>>>> + for (i = 0; i < AT_COUNT; i++) {
>>>>>>>>> + auxp = aux_info[i];
>>>>>>>>> + if (auxp == NULL)
>>>>>>>>> + continue;
>>>>>>>>> + fmt = &auxfmts[i];
>>>>>>>>> + if (fmt->fmt == NULL)
>>>>>>>>> + continue;
>>>>>>>>> + rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
>>>>>>>>> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
>>>>>>>>> + rtld_fdprintf(STDOUT_FILENO, "\n");
>>>>>>>>
>>>>>>>> This is undefined behaviour, breaks CHERI, and totally unnecessary. You
>>>>>>>> have a handful of cases here, just make an enum and have separate
>>>>>>>> rtld_fdprintf calls.
>>>>>>
>>>>>> In particular, ignoring CHERI, unsigned ints are sign-extended to 64
>>>>>> bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a %u,
>>>>>> you are violating the calling convention. I can’t currently get GCC or
>>>>>> Clang to exploit the fact that varargs arguments are sign-extended, but
>>>>>> on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips via
>>>>>> memory even when the va_arg calls have no branching surrounding them,
>>>>>> rather than just grabbing from the register) there is a redundant
>>>>>> sext.w that can legally be optimised out, but would be broken by this
>>>>>> calling convention violation.
>>>>> I might understand the argument that all non-pointer formats for auxv
>>>>> should be longs, i.e. %lu/%ld/%lx, but this is the only problem I see
>>>>> there. We do rely on having specific representations for addresses and
>>>>> longs, and a low-level component as rtld has full rights to exercise
>>>>> this fact, same as VM subsystem or memory allocators.
>>>>>
>>>>> In fact ELF spec exercises this as well.
>>>>> Our arches are either ILP32 or LP64.
>>>>>
>>>>>>
>>>>>> Then CHERI makes it worse because a_ptr and a_val do not have the same
>>>>>> representation, although in practice I think passing a_ptr and nothing
>>>>>> further does end up working on CHERI-RISC-V and Morello, just not
>>>>>> CHERI-MIPS due to being big-endian.
>>>>
>>>> Ok, the following should be enough for CHERI, right?
>>>>
>>>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
>>>> index 0475134b0d96..cf467ae7aacd 100644
>>>> --- a/libexec/rtld-elf/rtld.c
>>>> +++ b/libexec/rtld-elf/rtld.c
>>>> @@ -6071,33 +6071,33 @@ static const struct auxfmt {
>>>> } auxfmts[] = {
>>>> AUXFMT(AT_NULL, NULL),
>>>> AUXFMT(AT_IGNORE, NULL),
>>>> - AUXFMT(AT_EXECFD, "%d"),
>>>> + AUXFMT(AT_EXECFD, "%ld"),
>>>> AUXFMT(AT_PHDR, "%p"),
>>>> - AUXFMT(AT_PHENT, "%u"),
>>>> - AUXFMT(AT_PHNUM, "%u"),
>>>> - AUXFMT(AT_PAGESZ, "%u"),
>>>> + AUXFMT(AT_PHENT, "%lu"),
>>>> + AUXFMT(AT_PHNUM, "%lu"),
>>>> + AUXFMT(AT_PAGESZ, "%lu"),
>>>> AUXFMT(AT_BASE, "%#lx"),
>>>> AUXFMT(AT_FLAGS, "%#lx"),
>>>> AUXFMT(AT_ENTRY, "%p"),
>>>> AUXFMT(AT_NOTELF, NULL),
>>>> - AUXFMT(AT_UID, "%d"),
>>>> - AUXFMT(AT_EUID, "%d"),
>>>> - AUXFMT(AT_GID, "%d"),
>>>> - AUXFMT(AT_EGID, "%d"),
>>>> + AUXFMT(AT_UID, "%ld"),
>>>> + AUXFMT(AT_EUID, "%ld"),
>>>> + AUXFMT(AT_GID, "%ld"),
>>>> + AUXFMT(AT_EGID, "%ld"),
>>>> AUXFMT(AT_EXECPATH, "%s"),
>>>> AUXFMT(AT_CANARY, "%p"),
>>>> - AUXFMT(AT_CANARYLEN, "%u"),
>>>> - AUXFMT(AT_OSRELDATE, "%u"),
>>>> - AUXFMT(AT_NCPUS, "%u"),
>>>> + AUXFMT(AT_CANARYLEN, "%lu"),
>>>> + AUXFMT(AT_OSRELDATE, "%lu"),
>>>> + AUXFMT(AT_NCPUS, "%lu"),
>>>> AUXFMT(AT_PAGESIZES, "%p"),
>>>> - AUXFMT(AT_PAGESIZESLEN, "%u"),
>>>> + AUXFMT(AT_PAGESIZESLEN, "%lu"),
>>>> AUXFMT(AT_TIMEKEEP, "%p"),
>>>> - AUXFMT(AT_STACKPROT, "%#x"),
>>>> + AUXFMT(AT_STACKPROT, "%#lx"),
>>>> AUXFMT(AT_EHDRFLAGS, "%#lx"),
>>>> AUXFMT(AT_HWCAP, "%#lx"),
>>>> AUXFMT(AT_HWCAP2, "%#lx"),
>>>> AUXFMT(AT_BSDFLAGS, "%#lx"),
>>>> - AUXFMT(AT_ARGC, "%u"),
>>>> + AUXFMT(AT_ARGC, "%lu"),
>>>> AUXFMT(AT_ARGV, "%p"),
>>>> AUXFMT(AT_ENVC, "%p"),
>>>> AUXFMT(AT_ENVV, "%p"),
>>>> @@ -6105,6 +6105,15 @@ static const struct auxfmt {
>>>> AUXFMT(AT_FXRNG, "%p"),
>>>> };
>>>>
>>>> +static bool
>>>> +is_ptr_fmt(const char *fmt)
>>>> +{
>>>> + char last;
>>>> +
>>>> + last = fmt[strlen(fmt) - 1];
>>>> + return (last == 'p' || last == 's');
>>>> +}
>>>> +
>>>> static void
>>>> dump_auxv(Elf_Auxinfo **aux_info)
>>>> {
>>>> @@ -6120,7 +6129,8 @@ dump_auxv(Elf_Auxinfo **aux_info)
>>>> if (fmt->fmt == NULL)
>>>> continue;
>>>> rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
>>>> - rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
>>>> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, is_ptr_fmt(fmt->fmt) ?
>>>> + auxp->a_un.a_ptr : auxp->a_un.a_val);
>>>> rtld_fdprintf(STDOUT_FILENO, "\n");
>>>> }
>>>> }
>>>
>>> That should indeed work, though I’d argue it’s still not as nice as
>>> avoiding rtld_fdprintfx entirely.
>>
>> Wait, no, it doesn’t, the ternary means both operands need to have the
>> same type, so you end up implicitly casting the long to a pointer. You
>> need a real if and two different rtld_fdprintfx calls (or just do it as
>> I’ve suggested).
> Ok, real if() then.
>
> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
> index 0475134b0d96..d5c3d2893582 100644
> --- a/libexec/rtld-elf/rtld.c
> +++ b/libexec/rtld-elf/rtld.c
> @@ -6071,33 +6071,33 @@ static const struct auxfmt {
> } auxfmts[] = {
> AUXFMT(AT_NULL, NULL),
> AUXFMT(AT_IGNORE, NULL),
> - AUXFMT(AT_EXECFD, "%d"),
> + AUXFMT(AT_EXECFD, "%ld"),
> AUXFMT(AT_PHDR, "%p"),
> - AUXFMT(AT_PHENT, "%u"),
> - AUXFMT(AT_PHNUM, "%u"),
> - AUXFMT(AT_PAGESZ, "%u"),
> + AUXFMT(AT_PHENT, "%lu"),
> + AUXFMT(AT_PHNUM, "%lu"),
> + AUXFMT(AT_PAGESZ, "%lu"),
> AUXFMT(AT_BASE, "%#lx"),
> AUXFMT(AT_FLAGS, "%#lx"),
> AUXFMT(AT_ENTRY, "%p"),
> AUXFMT(AT_NOTELF, NULL),
> - AUXFMT(AT_UID, "%d"),
> - AUXFMT(AT_EUID, "%d"),
> - AUXFMT(AT_GID, "%d"),
> - AUXFMT(AT_EGID, "%d"),
> + AUXFMT(AT_UID, "%ld"),
> + AUXFMT(AT_EUID, "%ld"),
> + AUXFMT(AT_GID, "%ld"),
> + AUXFMT(AT_EGID, "%ld"),
> AUXFMT(AT_EXECPATH, "%s"),
> AUXFMT(AT_CANARY, "%p"),
> - AUXFMT(AT_CANARYLEN, "%u"),
> - AUXFMT(AT_OSRELDATE, "%u"),
> - AUXFMT(AT_NCPUS, "%u"),
> + AUXFMT(AT_CANARYLEN, "%lu"),
> + AUXFMT(AT_OSRELDATE, "%lu"),
> + AUXFMT(AT_NCPUS, "%lu"),
> AUXFMT(AT_PAGESIZES, "%p"),
> - AUXFMT(AT_PAGESIZESLEN, "%u"),
> + AUXFMT(AT_PAGESIZESLEN, "%lu"),
> AUXFMT(AT_TIMEKEEP, "%p"),
> - AUXFMT(AT_STACKPROT, "%#x"),
> + AUXFMT(AT_STACKPROT, "%#lx"),
> AUXFMT(AT_EHDRFLAGS, "%#lx"),
> AUXFMT(AT_HWCAP, "%#lx"),
> AUXFMT(AT_HWCAP2, "%#lx"),
> AUXFMT(AT_BSDFLAGS, "%#lx"),
> - AUXFMT(AT_ARGC, "%u"),
> + AUXFMT(AT_ARGC, "%lu"),
> AUXFMT(AT_ARGV, "%p"),
> AUXFMT(AT_ENVC, "%p"),
> AUXFMT(AT_ENVV, "%p"),
> @@ -6105,6 +6105,15 @@ static const struct auxfmt {
> AUXFMT(AT_FXRNG, "%p"),
> };
>
> +static bool
> +is_ptr_fmt(const char *fmt)
> +{
> + char last;
> +
> + last = fmt[strlen(fmt) - 1];
> + return (last == 'p' || last == 's');
> +}
> +
> static void
> dump_auxv(Elf_Auxinfo **aux_info)
> {
> @@ -6120,7 +6129,13 @@ dump_auxv(Elf_Auxinfo **aux_info)
> if (fmt->fmt == NULL)
> continue;
> rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name);
> - rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr);
> + if (is_ptr_fmt(fmt->fmt)) {
> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt,
> + auxp->a_un.a_ptr);
> + } else {
> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt,
> + auxp->a_un.a_val);
> + }
> rtld_fdprintf(STDOUT_FILENO, "\n");
> }
> }
I can’t think of a reason why that wouldn’t work, so consider this
reviewed by me.
Thanks,
Jess