Re: git: a34f42b083dd - stable/14 - vmm: Support INOUT manual decode.
- In reply to: ShengYi Hung : "git: a34f42b083dd - stable/14 - vmm: Support INOUT manual decode."
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Wed, 04 Mar 2026 14:41:41 UTC
For people who might think add fields in struct vm_inout_str breaks ABI:
This structure is allocated in a union and the size of that union is
larger (checked by static_assert) even if we add these two fields in here.
On 3/4/26 22:38, ShengYi Hung wrote:
> The branch stable/14 has been updated by aokblast:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=a34f42b083dd323314d385bc7b00323d336ea28b
>
> commit a34f42b083dd323314d385bc7b00323d336ea28b
> Author: ShengYi Hung <aokblast@FreeBSD.org>
> AuthorDate: 2025-07-11 08:52:01 +0000
> Commit: ShengYi Hung <aokblast@FreeBSD.org>
> CommitDate: 2026-03-04 14:35:40 +0000
>
> vmm: Support INOUT manual decode.
>
> The inout instruction in AMD SVM requires DecodeAssist feature to decode the
> segment override prefix. However, without that feature, we are still
> able to decode by fetching the instruction directly.
>
> Approved by: markj (mentor)
> MFC after: 2 weeks
> Sponsored by: The FreeBSD Foundation
> Differential Revision: https://reviews.freebsd.org/D51256
>
> (cherry picked from commit c18c521c79b6160ce43bb2ca4c2eb42ccf7e6e57)
> ---
> sys/amd64/include/vmm.h | 2 +
> sys/amd64/include/vmm_instruction_emul.h | 25 +++++++++
> sys/amd64/vmm/amd/svm.c | 90 +++++++++++++++++++-------------
> sys/amd64/vmm/intel/vmx.c | 2 +
> sys/amd64/vmm/vmm_instruction_emul.c | 34 ++++--------
> sys/amd64/vmm/vmm_ioport.c | 40 ++++++++++++++
> 6 files changed, 132 insertions(+), 61 deletions(-)
>
> diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
> index 09f2ea7cb0e5..262fbd6449a0 100644
> --- a/sys/amd64/include/vmm.h
> +++ b/sys/amd64/include/vmm.h
> @@ -674,6 +674,8 @@ struct vm_inout_str {
> int addrsize;
> enum vm_reg_name seg_name;
> struct seg_desc seg_desc;
> + int cs_d;
> + uint64_t cs_base;
> };
>
> enum task_switch_reason {
> diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h
> index a21754ac777f..e57ad81c3e60 100644
> --- a/sys/amd64/include/vmm_instruction_emul.h
> +++ b/sys/amd64/include/vmm_instruction_emul.h
> @@ -31,6 +31,31 @@
>
> #include <sys/mman.h>
>
> +/* struct vie_op.op_type */
> +enum {
> + VIE_OP_TYPE_NONE = 0,
> + VIE_OP_TYPE_MOV,
> + VIE_OP_TYPE_MOVSX,
> + VIE_OP_TYPE_MOVZX,
> + VIE_OP_TYPE_AND,
> + VIE_OP_TYPE_OR,
> + VIE_OP_TYPE_SUB,
> + VIE_OP_TYPE_TWO_BYTE,
> + VIE_OP_TYPE_PUSH,
> + VIE_OP_TYPE_CMP,
> + VIE_OP_TYPE_POP,
> + VIE_OP_TYPE_MOVS,
> + VIE_OP_TYPE_GROUP1,
> + VIE_OP_TYPE_STOS,
> + VIE_OP_TYPE_BITTEST,
> + VIE_OP_TYPE_TWOB_GRP15,
> + VIE_OP_TYPE_ADD,
> + VIE_OP_TYPE_TEST,
> + VIE_OP_TYPE_BEXTR,
> + VIE_OP_TYPE_OUTS,
> + VIE_OP_TYPE_LAST
> +};
> +
> /*
> * Callback functions to read and write memory regions.
> */
> diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
> index 3fda9454090b..b85a3e0fcc3e 100644
> --- a/sys/amd64/vmm/amd/svm.c
> +++ b/sys/amd64/vmm/amd/svm.c
> @@ -309,6 +309,33 @@ svm_set_tsc_offset(struct svm_vcpu *vcpu, uint64_t offset)
> #define MSR_AMD7TH_START 0xC0010000UL
> #define MSR_AMD7TH_END 0xC0011FFFUL
>
> +static void
> +svm_get_cs_info(struct vmcb *vmcb, struct vm_guest_paging *paging, int *cs_d,
> + uint64_t *base)
> +{
> + struct vmcb_segment seg;
> + int error __diagused;
> +
> + error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
> + KASSERT(error == 0, ("%s: vmcb_seg error %d", __func__, error));
> +
> + switch (paging->cpu_mode) {
> + case CPU_MODE_REAL:
> + *base = seg.base;
> + *cs_d = 0;
> + break;
> + case CPU_MODE_PROTECTED:
> + case CPU_MODE_COMPATIBILITY:
> + *cs_d = !!(seg.attrib & VMCB_CS_ATTRIB_D);
> + *base = seg.base;
> + break;
> + default:
> + *base = 0;
> + *cs_d = 0;
> + break;
> + }
> +}
> +
> /*
> * Get the index and bit position for a MSR in permission bitmap.
> * Two bits are used for each MSR: lower bit for read and higher bit for write.
> @@ -727,10 +754,29 @@ svm_inout_str_seginfo(struct svm_vcpu *vcpu, int64_t info1, int in,
>
> if (in) {
> vis->seg_name = VM_REG_GUEST_ES;
> - } else {
> - /* The segment field has standard encoding */
> + } else if (decode_assist()) {
> + /*
> + * The effective segment number in EXITINFO1[12:10] is populated
> + * only if the processor has the DecodeAssist capability.
> + *
> + * XXX this is not specified explicitly in APMv2 but can be
> + * verified empirically.
> + */
> s = (info1 >> 10) & 0x7;
> +
> + /* The segment field has standard encoding */
> vis->seg_name = vm_segment_name(s);
> + } else {
> + /*
> + * The segment register need to be manually decoded by fetching
> + * the instructions near ip. However, we are unable to fetch it
> + * while the interrupts are disabled. Therefore, we leave the
> + * value unset until the generic ins/outs handler runs.
> + */
> + vis->seg_name = VM_REG_LAST;
> + svm_get_cs_info(vcpu->vmcb, &vis->paging, &vis->cs_d,
> + &vis->cs_base);
> + return;
> }
>
> error = svm_getdesc(vcpu, vis->seg_name, &vis->seg_desc);
> @@ -790,16 +836,6 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit)
> info1 = ctrl->exitinfo1;
> inout_string = info1 & BIT(2) ? 1 : 0;
>
> - /*
> - * The effective segment number in EXITINFO1[12:10] is populated
> - * only if the processor has the DecodeAssist capability.
> - *
> - * XXX this is not specified explicitly in APMv2 but can be verified
> - * empirically.
> - */
> - if (inout_string && !decode_assist())
> - return (UNHANDLED);
> -
> vmexit->exitcode = VM_EXITCODE_INOUT;
> vmexit->u.inout.in = (info1 & BIT(0)) ? 1 : 0;
> vmexit->u.inout.string = inout_string;
> @@ -817,6 +853,8 @@ svm_handle_io(struct svm_vcpu *vcpu, struct vm_exit *vmexit)
> vis->index = svm_inout_str_index(regs, vmexit->u.inout.in);
> vis->count = svm_inout_str_count(regs, vmexit->u.inout.rep);
> vis->addrsize = svm_inout_str_addrsize(info1);
> + vis->cs_d = 0;
> + vis->cs_base = 0;
> svm_inout_str_seginfo(vcpu, info1, vmexit->u.inout.in, vis);
> }
>
> @@ -858,10 +896,9 @@ static void
> svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
> {
> struct vm_guest_paging *paging;
> - struct vmcb_segment seg;
> struct vmcb_ctrl *ctrl;
> char *inst_bytes;
> - int error __diagused, inst_len;
> + int inst_len;
>
> ctrl = &vmcb->ctrl;
> paging = &vmexit->u.inst_emul.paging;
> @@ -871,29 +908,8 @@ svm_handle_inst_emul(struct vmcb *vmcb, uint64_t gpa, struct vm_exit *vmexit)
> vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
> svm_paging_info(vmcb, paging);
>
> - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
> - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
> -
> - switch(paging->cpu_mode) {
> - case CPU_MODE_REAL:
> - vmexit->u.inst_emul.cs_base = seg.base;
> - vmexit->u.inst_emul.cs_d = 0;
> - break;
> - case CPU_MODE_PROTECTED:
> - case CPU_MODE_COMPATIBILITY:
> - vmexit->u.inst_emul.cs_base = seg.base;
> -
> - /*
> - * Section 4.8.1 of APM2, Default Operand Size or D bit.
> - */
> - vmexit->u.inst_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
> - 1 : 0;
> - break;
> - default:
> - vmexit->u.inst_emul.cs_base = 0;
> - vmexit->u.inst_emul.cs_d = 0;
> - break;
> - }
> + svm_get_cs_info(vmcb, paging, &vmexit->u.inst_emul.cs_d,
> + &vmexit->u.inst_emul.cs_base);
>
> /*
> * Copy the instruction bytes into 'vie' if available.
> diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
> index 3fe20986ec8d..daa4ef56316f 100644
> --- a/sys/amd64/vmm/intel/vmx.c
> +++ b/sys/amd64/vmm/intel/vmx.c
> @@ -2648,6 +2648,8 @@ vmx_exit_process(struct vmx *vmx, struct vmx_vcpu *vcpu, struct vm_exit *vmexit)
> vis->index = inout_str_index(vcpu, in);
> vis->count = inout_str_count(vcpu, vis->inout.rep);
> vis->addrsize = inout_str_addrsize(inst_info);
> + vis->cs_d = 0;
> + vis->cs_base = 0;
> inout_str_seginfo(vcpu, inst_info, in, vis);
> }
> SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpuid, vmexit);
> diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
> index 7626082ff88d..12b7413e33de 100644
> --- a/sys/amd64/vmm/vmm_instruction_emul.c
> +++ b/sys/amd64/vmm/vmm_instruction_emul.c
> @@ -63,30 +63,6 @@
> #include <x86/psl.h>
> #include <x86/specialreg.h>
>
> -/* struct vie_op.op_type */
> -enum {
> - VIE_OP_TYPE_NONE = 0,
> - VIE_OP_TYPE_MOV,
> - VIE_OP_TYPE_MOVSX,
> - VIE_OP_TYPE_MOVZX,
> - VIE_OP_TYPE_AND,
> - VIE_OP_TYPE_OR,
> - VIE_OP_TYPE_SUB,
> - VIE_OP_TYPE_TWO_BYTE,
> - VIE_OP_TYPE_PUSH,
> - VIE_OP_TYPE_CMP,
> - VIE_OP_TYPE_POP,
> - VIE_OP_TYPE_MOVS,
> - VIE_OP_TYPE_GROUP1,
> - VIE_OP_TYPE_STOS,
> - VIE_OP_TYPE_BITTEST,
> - VIE_OP_TYPE_TWOB_GRP15,
> - VIE_OP_TYPE_ADD,
> - VIE_OP_TYPE_TEST,
> - VIE_OP_TYPE_BEXTR,
> - VIE_OP_TYPE_LAST
> -};
> -
> /* struct vie_op.op_flags */
> #define VIE_OP_F_IMM (1 << 0) /* 16/32-bit immediate operand */
> #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
> @@ -150,6 +126,16 @@ static const struct vie_op one_byte_opcodes[256] = {
> .op_byte = 0x3B,
> .op_type = VIE_OP_TYPE_CMP,
> },
> + [0x6E] = {
> + .op_byte = 0x6E,
> + .op_type = VIE_OP_TYPE_OUTS,
> + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
> + },
> + [0x6F] = {
> + .op_byte = 0x6F,
> + .op_type = VIE_OP_TYPE_OUTS,
> + .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION,
> + },
> [0x88] = {
> .op_byte = 0x88,
> .op_type = VIE_OP_TYPE_MOV,
> diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
> index 3caea7deac16..c8f777a82c14 100644
> --- a/sys/amd64/vmm/vmm_ioport.c
> +++ b/sys/amd64/vmm/vmm_ioport.c
> @@ -144,10 +144,50 @@ emulate_inout_port(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu)
> return (0);
> }
>
> +static int
> +decode_segment(struct vcpu *vcpu, enum vm_reg_name *segment)
> +{
> + struct vm_guest_paging *paging;
> + struct vie vie;
> + struct vm_exit *vme;
> + int err;
> + int fault;
> +
> + vme = vm_exitinfo(vcpu);
> + paging = &vme->u.inout_str.paging;
> +
> + vie_init(&vie, NULL, 0);
> + err = vmm_fetch_instruction(vcpu, paging,
> + vme->rip + vme->u.inout_str.cs_base, VIE_INST_SIZE, &vie, &fault);
> + if (err || fault)
> + return (err);
> +
> + err = vmm_decode_instruction(vcpu, VIE_INVALID_GLA, paging->cpu_mode,
> + vme->u.inout_str.cs_d, &vie);
> +
> + if (err || vie.op.op_type != VIE_OP_TYPE_OUTS)
> + return (EINVAL);
> + if (vie.segment_override)
> + *segment = vie.segment_register;
> + else
> + *segment = VM_REG_GUEST_DS;
> +
> + return (0);
> +}
> +
> static int
> emulate_inout_str(struct vcpu *vcpu, struct vm_exit *vmexit, bool *retu)
> {
> + int err;
> +
> *retu = true;
> + if (vmexit->u.inout_str.seg_name == VM_REG_LAST) {
> + err = decode_segment(vcpu, &vmexit->u.inout_str.seg_name);
> + if (err)
> + return (err);
> + return (vm_get_seg_desc(vcpu, vmexit->u.inout_str.seg_name,
> + &vmexit->u.inout_str.seg_desc));
> + }
> return (0); /* Return to userspace to finish emulation */
> }
>
>