Re: git: df47355fae72 - main - libpmc: Add support for IBS qualifiers

From: Oliver Pinter <oliver.pntr_at_gmail.com>
Date: Mon, 23 Mar 2026 21:12:05 UTC
On Monday, March 23, 2026, Mitchell Horne <mhorne@freebsd.org> wrote:

> The branch main has been updated by mhorne:
>
> URL: https://cgit.FreeBSD.org/src/commit/?id=
> df47355fae720fd8f63f36a50c8933f8342483d2
>
> commit df47355fae720fd8f63f36a50c8933f8342483d2
> Author:     Ali Mashtizadeh <mashti@uwaterloo.ca>
> AuthorDate: 2026-03-18 04:27:09 +0000
> Commit:     Mitchell Horne <mhorne@FreeBSD.org>
> CommitDate: 2026-03-23 20:21:28 +0000
>
>     libpmc: Add support for IBS qualifiers
>
>     Add support to libpmc for parsing the IBS qualifiers and computing the
>     ctl register value as a function of the qualifiers and the sample rate.
>     This includes all of the flags available up to AMD Zen 5.  Along side
>     these user facing changes I included the documentation for AMD IBS.
>
>     Reviewed by:    mhorne
>     Sponsored by:   Netflix
>     Pull Request:   https://github.com/freebsd/freebsd-src/pull/2081
> ---
>  lib/libpmc/Makefile       |   1 +
>  lib/libpmc/libpmc.c       |  71 ++++++++++++++++++----
>  lib/libpmc/pmc.3          |   7 +++
>  lib/libpmc/pmc.amd.3      |   1 +
>  lib/libpmc/pmc.core.3     |   1 +
>  lib/libpmc/pmc.core2.3    |   1 +
>  lib/libpmc/pmc.iaf.3      |   1 +
>  lib/libpmc/pmc.ibs.3      | 150 ++++++++++++++++++++++++++++++
> ++++++++++++++++
>  lib/libpmc/pmc.soft.3     |   1 +
>  lib/libpmc/pmc.tsc.3      |   1 +
>  lib/libpmc/pmc.ucf.3      |   1 +
>  sys/dev/hwpmc/hwpmc_ibs.h |  19 +++++-
>  12 files changed, 244 insertions(+), 11 deletions(-)
>
> diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
> index 590f719ebff4..442efdc3d9c0 100644
> --- a/lib/libpmc/Makefile
> +++ b/lib/libpmc/Makefile
> @@ -74,6 +74,7 @@ MAN+= pmc.haswell.3
>  MAN+=  pmc.haswelluc.3
>  MAN+=  pmc.haswellxeon.3
>  MAN+=  pmc.iaf.3
> +MAN+=  pmc.ibs.3
>  MAN+=  pmc.ivybridge.3
>  MAN+=  pmc.ivybridgexeon.3
>  MAN+=  pmc.sandybridge.3
> diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
> index ceba40aa7b39..ebb642e8d16b 100644
> --- a/lib/libpmc/libpmc.c
> +++ b/lib/libpmc/libpmc.c
> @@ -696,7 +696,7 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
>      struct pmc_op_pmcallocate *pmc_config)
>  {
>         char *e, *p, *q;
> -       uint64_t ctl;
> +       uint64_t ctl, ldlat;
>
>         pmc_config->pm_caps |=
>             (PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_PRECISE);
> @@ -714,23 +714,74 @@ ibs_allocate_pmc(enum pmc_event pe, char *ctrspec,
>                 return (-1);
>         }
>
> +       /* IBS only supports sampling mode */
> +       if (!PMC_IS_SAMPLING_MODE(pmc_config->pm_mode)) {
> +               return (-1);
> +       }
> +
>         /* parse parameters */
> -       while ((p = strsep(&ctrspec, ",")) != NULL) {
> -               if (KWPREFIXMATCH(p, "ctl=")) {
> -                       q = strchr(p, '=');
> -                       if (*++q == '\0') /* skip '=' */
> +       ctl = 0;
> +       if (pe == PMC_EV_IBS_FETCH) {
> +               while ((p = strsep(&ctrspec, ",")) != NULL) {
> +                       if (KWMATCH(p, "l3miss")) {
> +                               ctl |= IBS_FETCH_CTL_L3MISSONLY;
> +                       } else if (KWMATCH(p, "randomize")) {
> +                               ctl |= IBS_FETCH_CTL_RANDOMIZE;
> +                       } else {
>                                 return (-1);
> +                       }
> +               }
>
> -                       ctl = strtoull(q, &e, 0);
> -                       if (e == q || *e != '\0')
> +               if (pmc_config->pm_count < IBS_FETCH_MIN_RATE ||
> +                   pmc_config->pm_count > IBS_FETCH_MAX_RATE)
> +                       return (-1);
> +
> +               ctl |= IBS_FETCH_INTERVAL_TO_CTL(pmc_config->pm_count);
> +       } else {
> +               while ((p = strsep(&ctrspec, ",")) != NULL) {
> +                       if (KWMATCH(p, "l3miss")) {
> +                               ctl |= IBS_OP_CTL_L3MISSONLY;
> +                       } else if (KWPREFIXMATCH(p, "ldlat=")) {
> +                               q = strchr(p, '=');
> +                               if (*++q == '\0') /* skip '=' */
> +                                       return (-1);
> +
> +                               ldlat = strtoull(q, &e, 0);
> +                               if (e == q || *e != '\0')
> +                                       return (-1);
> +
> +                               /*
> +                                * IBS load latency filtering requires the
> +                                * latency to be a multiple of 128 and
> between
> +                                * 128 and 2048.  The latency is stored in
> the
> +                                * IbsOpLatThrsh field, which only contains
> +                                * four bits so the processor computes
> +                                * (IbsOpLatThrsh+1)*128 as the value.
> +                                *
> +                                * AMD PPR Vol 1 for AMD Family 1Ah Model
> 02h
> +                                * C1 (57238) 2026-03-06 Revision 0.49.
> +                                */
> +                               if (ldlat < 128 || ldlat > 2048)
> +                                       return (-1);
> +                               ctl |= IBS_OP_CTL_LDLAT_TO_CTL(ldlat);
> +                               ctl |= IBS_OP_CTL_L3MISSONLY |
> IBS_OP_CTL_LATFLTEN;
> +                       } else if (KWMATCH(p, "randomize")) {
> +                               ctl |= IBS_OP_CTL_COUNTERCONTROL;
> +                       } else {
>                                 return (-1);
> +                       }
> +               }
>
> -                       pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
> -               } else {
> +               if (pmc_config->pm_count < IBS_OP_MIN_RATE ||
> +                   pmc_config->pm_count > IBS_OP_MAX_RATE)
>                         return (-1);
> -               }
> +
> +               ctl |= IBS_OP_INTERVAL_TO_CTL(pmc_config->pm_count);
>         }
>
> +
> +       pmc_config->pm_md.pm_ibs.ibs_ctl |= ctl;
> +
>         return (0);
>  }
>
> diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3
> index 9a5b599759ff..cb28e0b786b9 100644
> --- a/lib/libpmc/pmc.3
> +++ b/lib/libpmc/pmc.3
> @@ -224,6 +224,11 @@ performance measurement architecture version 2 and
> later.
>  Programmable hardware counters present in CPUs conforming to the
>  .Tn Intel
>  performance measurement architecture version 1 and later.
> +.It Li PMC_CLASS_IBS
> +.Tn AMD
> +Instruction Based Sampling (IBS) counters present in
> +.Tn AMD
> +Family 10h and above.
>  .It Li PMC_CLASS_K8
>  Programmable hardware counters present in
>  .Tn "AMD Athlon64"
> @@ -491,6 +496,7 @@ following manual pages:
>  .It Em "PMC Class"      Ta Em "Manual Page"
>  .It Li PMC_CLASS_IAF    Ta Xr pmc.iaf 3
>  .It Li PMC_CLASS_IAP    Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3
> +.It Li PMC_CLASS_IBS    Ta Xr pmc.ibs 3
>  .It Li PMC_CLASS_K8     Ta Xr pmc.amd 3
>  .It Li PMC_CLASS_TSC    Ta Xr pmc.tsc 3
>  .El
> @@ -542,6 +548,7 @@ Doing otherwise is unsupported.
>  .Xr pmc.haswelluc 3 ,
>  .Xr pmc.haswellxeon 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.ivybridge 3 ,
>  .Xr pmc.ivybridgexeon 3 ,
>  .Xr pmc.sandybridge 3 ,
> diff --git a/lib/libpmc/pmc.amd.3 b/lib/libpmc/pmc.amd.3
> index 047b31aa78bb..75c6331b000f 100644
> --- a/lib/libpmc/pmc.amd.3
> +++ b/lib/libpmc/pmc.amd.3
> @@ -777,6 +777,7 @@ and the underlying hardware events used.
>  .Xr pmc.core 3 ,
>  .Xr pmc.core2 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmclog 3 ,
> diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3
> index b4fa9ab661a4..4c41e7c7ad3b 100644
> --- a/lib/libpmc/pmc.core.3
> +++ b/lib/libpmc/pmc.core.3
> @@ -786,6 +786,7 @@ may not count some transitions.
>  .Xr pmc.atom 3 ,
>  .Xr pmc.core2 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmclog 3 ,
> diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3
> index 86604b7ff16c..7e544fad43b6 100644
> --- a/lib/libpmc/pmc.core2.3
> +++ b/lib/libpmc/pmc.core2.3
> @@ -1101,6 +1101,7 @@ and the underlying hardware events used.
>  .Xr pmc.atom 3 ,
>  .Xr pmc.core 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmc_cpuinfo 3 ,
> diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3
> index eaf45db140f5..c3528e472103 100644
> --- a/lib/libpmc/pmc.iaf.3
> +++ b/lib/libpmc/pmc.iaf.3
> @@ -125,6 +125,7 @@ CPU, use the event specifier
>  .Xr pmc.atom 3 ,
>  .Xr pmc.core 3 ,
>  .Xr pmc.core2 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmc_cpuinfo 3 ,
> diff --git a/lib/libpmc/pmc.ibs.3 b/lib/libpmc/pmc.ibs.3
> new file mode 100644
> index 000000000000..69b90b84556c
> --- /dev/null
> +++ b/lib/libpmc/pmc.ibs.3
> @@ -0,0 +1,150 @@
> +.\" Copyright (c) 2016 Ali Mashtizadeh.  All rights reserved.


Isn't this 2026?


> +.\"
> +.\" Redistribution and use in source and binary forms, with or without
> +.\" modification, are permitted provided that the following conditions
> +.\" are met:
> +.\" 1. Redistributions of source code must retain the above copyright
> +.\"    notice, this list of conditions and the following disclaimer.
> +.\" 2. Redistributions in binary form must reproduce the above copyright
> +.\"    notice, this list of conditions and the following disclaimer in the
> +.\"    documentation and/or other materials provided with the
> distribution.
> +.\"
> +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> PURPOSE
> +.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
> LIABLE
> +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> CONSEQUENTIAL
> +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
> GOODS
> +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> STRICT
> +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
> WAY
> +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> +.\" SUCH DAMAGE.
> +.\"
> +.Dd March 15, 2026
> +.Dt PMC.IBS 3
> +.Os
> +.Sh NAME
> +.Nm pmc.ibs
> +.Nd Instruction Based Sampling for
> +.Tn AMD
> +CPUs
> +.Sh LIBRARY
> +.Lb libpmc
> +.Sh SYNOPSIS
> +.In pmc.h
> +.Sh DESCRIPTION
> +AMD Instruction Based Sampling (IBS) was introduced with the K10 family of
> +CPUs.
> +AMD IBS is an alternative approach that samples instructions or micro-ops
> and
> +provides a per-instruction or micro-op breakdown of the sources of stalls.
> +.Pp
> +Unlike traditional counters, IBS can only be used in the sampling mode and
> +provides extra data embedded in the callchain.
> +IBS events set the PMC_F_MULTIPART flag to signify multiple payload types
> are
> +contained in the callchain.
> +The first 8 bytes of the callchain contain four tuples with a one byte
> type and
> +a one byte length field.
> +The regular PMC callchain can be found following the multipart payload.
> +.Pp
> +IBS only provides two events that analyze instruction fetches and
> instruction
> +execution.
> +The instruction fetch (ibs-fetch) event provides data on the processor
> +front-end including reporting instruction cache and TLB events.
> +The instruction execution (ibs-op) event provides data on the processor
> +execution including reporting mispredictions, data cache and TLB events.
> +You should use the AMD PMC counters documented in
> +.Xr pmc.amd 3
> +to analyze stalls relating instruction issue including reservation
> contention.
> +.Pp
> +A guide to analyzing IBS data is provided in Appendix G of the
> +.Rs
> +.%B "Software Optimization Guide for AMD Family 10h and 12h Processors"
> +.%N "Publication No. 40546"
> +.%D "February 2011"
> +.%Q "Advanced Micro Devices, Inc."
> +.Re
> +A more recent document should be used for decoding all of the flags and
> fields
> +in the IBS data.
> +For example, see the AMD Zen 5 documentation
> +.Rs
> +.%B "Processor Programming Reference (PPR) for AMD Family 1Ah Model 02h"
> +.%N "Publication No. 57238"
> +.%D "March 6, 2026"
> +.%Q "Advanced Micro Devices, Inc."
> +.Re
> +.Ss PMC Features
> +AMD IBS supports the following capabilities.
> +.Bl -column "PMC_CAP_INTERRUPT" "Support"
> +.It Em Capability Ta Em Support
> +.It PMC_CAP_CASCADE Ta \&No
> +.It PMC_CAP_EDGE Ta Yes
> +.It PMC_CAP_INTERRUPT Ta Yes
> +.It PMC_CAP_INVERT Ta \&No
> +.It PMC_CAP_READ Ta \&No
> +.It PMC_CAP_PRECISE Ta Yes
> +.It PMC_CAP_SYSTEM Ta Yes
> +.It PMC_CAP_TAGGING Ta \&No
> +.It PMC_CAP_THRESHOLD Ta \&No
> +.It PMC_CAP_USER Ta \&No
> +.It PMC_CAP_WRITE Ta \&No
> +.El
> +.Pp
> +By default AMD IBS enables the edge, interrupt, system and precise flags.
> +.Ss Event Qualifiers
> +Event specifiers for AMD IBS can have the following optional
> +qualifiers:
> +.Bl -tag -width "ldlat=value"
> +.It Li l3miss
> +Configure IBS to only sample if an l3miss occurred.
> +.It Li ldlat= Ns Ar value
> +Configure the counter to only sample events with load latencies above
> +.Ar ldlat .
> +IBS only supports filtering latencies that are a multiple of 128 and
> between
> +128 and 2048.
> +Load latency filtering can only be used with ibs-op events and imply the
> +l3miss qualifier.
> +.It Li randomize
> +Randomize the sampling rate.
> +.El
> +.Ss AMD IBS Events Specifiers
> +The IBS event class provides only two event specifiers:
> +.Bl -tag -width indent
> +.It Li ibs-fetch Xo
> +.Op ,l3miss
> +.Op ,randomize
> +.Xc
> +Collect performance samples during instruction fetch.
> +The
> +.Ar randomize
> +qualifier randomly sets the bottom four bits of the sample rate.
> +.It Li ibs-op Xo
> +.Op ,l3miss
> +.Op ,ldlat= Ns Ar ldlat
> +.Op ,randomize
> +.Xc
> +Collect performance samples during instruction execution.
> +The
> +.Ar randomize
> +qualifier, upon reaching the maximum count, restarts the count with a
> value
> +between 1 and 127.
> +.El
> +.Pp
> +You may collect both events at the same time.
> +N.B. AMD discouraged doing so with certain older processors, stating that
> +sampling both simultaneously perturbs the results.
> +Please see the processor programming reference for your specific
> processor.
> +.Sh SEE ALSO
> +.Xr pmc 3 ,
> +.Xr pmc.amd 3 ,
> +.Xr pmc.soft 3 ,
> +.Xr pmc.tsc 3 ,
> +.Xr pmclog 3 ,
> +.Xr hwpmc 4
> +.Sh HISTORY
> +AMD IBS support was first introduced in
> +.Fx 16.0 .
> +.Sh AUTHORS
> +AMD IBS support and this manual page were written
> +.An Ali Mashtizadeh Aq Mt ali@mashtizadeh.com
> +and sponsored by Netflix, Inc.
> diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3
> index 08d5af63d02d..f58b3e8ffa26 100644
> --- a/lib/libpmc/pmc.soft.3
> +++ b/lib/libpmc/pmc.soft.3
> @@ -90,6 +90,7 @@ Write page fault.
>  .Xr pmc.corei7 3 ,
>  .Xr pmc.corei7uc 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmc.ucf 3 ,
>  .Xr pmc.westmereuc 3 ,
> diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3
> index 4834d897f90c..73e2377df0c7 100644
> --- a/lib/libpmc/pmc.tsc.3
> +++ b/lib/libpmc/pmc.tsc.3
> @@ -62,6 +62,7 @@ maps to the TSC.
>  .Xr pmc.core 3 ,
>  .Xr pmc.core2 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmclog 3 ,
>  .Xr hwpmc 4
> diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3
> index a7cea6bb57f9..37ee0f87a951 100644
> --- a/lib/libpmc/pmc.ucf.3
> +++ b/lib/libpmc/pmc.ucf.3
> @@ -88,6 +88,7 @@ offset C0H under device number 0 and Function 0.
>  .Xr pmc.corei7 3 ,
>  .Xr pmc.corei7uc 3 ,
>  .Xr pmc.iaf 3 ,
> +.Xr pmc.ibs 3 ,
>  .Xr pmc.soft 3 ,
>  .Xr pmc.tsc 3 ,
>  .Xr pmc.westmere 3 ,
> diff --git a/sys/dev/hwpmc/hwpmc_ibs.h b/sys/dev/hwpmc/hwpmc_ibs.h
> index 4449b44c8368..01fc88648558 100644
> --- a/sys/dev/hwpmc/hwpmc_ibs.h
> +++ b/sys/dev/hwpmc/hwpmc_ibs.h
> @@ -67,6 +67,18 @@
>  #define IBS_CTL_LVTOFFSETVALID         (1ULL << 8)
>  #define IBS_CTL_LVTOFFSETMASK          0x0000000F
>
> +/*
> + * The minimum sampling rate was selected to match the default used by
> other
> + * counters that was also found to be experimentally stable by providing
> enough
> + * time between consecutive NMIs.  The maximum sample rate is determined
> by
> + * setting all available counter bits, i.e., all available bits except the
> + * bottom four that are zero extended.
> + */
> +#define IBS_FETCH_MIN_RATE             65536
> +#define IBS_FETCH_MAX_RATE             1048560
> +#define IBS_OP_MIN_RATE                        65536
> +#define IBS_OP_MAX_RATE                        134217712
> +
>  /* IBS Fetch Control */
>  #define IBS_FETCH_CTL                  0xC0011030 /* IBS Fetch Control */
>  #define IBS_FETCH_CTL_L3MISS           (1ULL << 61) /* L3 Cache Miss */
> @@ -82,7 +94,8 @@
>  #define IBS_FETCH_CTL_ENABLE           (1ULL << 48) /* Enable */
>  #define IBS_FETCH_CTL_MAXCNTMASK       0x0000FFFFULL
>
> -#define IBS_FETCH_CTL_TO_LAT(_c)       ((_c >> 32) & 0x0000FFFF)
> +#define IBS_FETCH_INTERVAL_TO_CTL(_c)  (((_c) >> 4) & 0x0000FFFF)
> +#define IBS_FETCH_CTL_TO_LAT(_c)       (((_c) >> 32) & 0x0000FFFF)
>
>  #define IBS_FETCH_LINADDR              0xC0011031 /* Fetch Linear Address
> */
>  #define IBS_FETCH_PHYSADDR             0xC0011032 /* Fetch Physical
> Address */
> @@ -95,12 +108,16 @@
>
>  /* IBS Execution Control */
>  #define IBS_OP_CTL                     0xC0011033 /* IBS Execution
> Control */
> +#define IBS_OP_CTL_LATFLTEN            (1ULL << 63) /* Load Latency
> Filtering */
>  #define IBS_OP_CTL_COUNTERCONTROL      (1ULL << 19) /* Counter Control */
>  #define IBS_OP_CTL_VALID               (1ULL << 18) /* Valid */
>  #define IBS_OP_CTL_ENABLE              (1ULL << 17) /* Enable */
>  #define IBS_OP_CTL_L3MISSONLY          (1ULL << 16) /* L3 Miss Filtering
> */
>  #define IBS_OP_CTL_MAXCNTMASK          0x0000FFFFULL
>
> +#define IBS_OP_CTL_LDLAT_TO_CTL(_c)    ((((ldlat) >> 7) - 1) << 59)
> +#define IBS_OP_INTERVAL_TO_CTL(_c)     ((((_c) >> 4) & 0x0000FFFFULL) |
> ((_c) & 0x07F00000))
> +
>  #define IBS_OP_RIP                     0xC0011034 /* IBS Op RIP */
>  #define IBS_OP_DATA                    0xC0011035 /* IBS Op Data */
>  #define IBS_OP_DATA_RIPINVALID         (1ULL << 38) /* RIP Invalid */
>
>