svn commit: r210080 - in stable/7/sys: amd64/amd64 amd64/include
i386/i386 i386/include
John Baldwin
jhb at FreeBSD.org
Wed Jul 14 21:10:47 UTC 2010
Author: jhb
Date: Wed Jul 14 21:10:47 2010
New Revision: 210080
URL: http://svn.freebsd.org/changeset/base/210080
Log:
MFC 208507,208556,208621:
Add support for corrected machine check interrupts. CMCI is a new local
APIC interrupt that fires when a threshold of corrected machine check
events is reached. CMCI also includes a count of events when reporting
corrected errors in the bank's status register. Note that individual
banks may or may not support CMCI. If they do, each bank includes its own
threshold register that determines when the interrupt fires. Currently
the code uses a very simple strategy where it doubles the threshold on
each interrupt until it succeeds in throttling the interrupt to occur
only once a minute (this interval can be tuned via sysctl). The threshold
is also adjusted on each hourly poll which will lower the threshold once
events stop occurring.
Modified:
stable/7/sys/amd64/amd64/apic_vector.S
stable/7/sys/amd64/amd64/local_apic.c
stable/7/sys/amd64/amd64/machdep.c
stable/7/sys/amd64/amd64/mca.c
stable/7/sys/amd64/include/apicreg.h
stable/7/sys/amd64/include/apicvar.h
stable/7/sys/amd64/include/mca.h
stable/7/sys/amd64/include/pcpu.h
stable/7/sys/amd64/include/specialreg.h
stable/7/sys/i386/i386/apic_vector.s
stable/7/sys/i386/i386/local_apic.c
stable/7/sys/i386/i386/machdep.c
stable/7/sys/i386/i386/mca.c
stable/7/sys/i386/include/apicreg.h
stable/7/sys/i386/include/apicvar.h
stable/7/sys/i386/include/mca.h
stable/7/sys/i386/include/pcpu.h
stable/7/sys/i386/include/specialreg.h
Directory Properties:
stable/7/sys/ (props changed)
stable/7/sys/cddl/contrib/opensolaris/ (props changed)
stable/7/sys/contrib/dev/acpica/ (props changed)
stable/7/sys/contrib/pf/ (props changed)
Modified: stable/7/sys/amd64/amd64/apic_vector.S
==============================================================================
--- stable/7/sys/amd64/amd64/apic_vector.S Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/amd64/apic_vector.S Wed Jul 14 21:10:47 2010 (r210080)
@@ -105,6 +105,18 @@ IDTVEC(timerint)
jmp doreti
/*
+ * Local APIC CMCI handler.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(cmcint)
+ PUSH_FRAME
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ call lapic_handle_cmc
+ MEXITCOUNT
+ jmp doreti
+
+/*
* Local APIC error interrupt handler.
*/
.text
Modified: stable/7/sys/amd64/amd64/local_apic.c
==============================================================================
--- stable/7/sys/amd64/amd64/local_apic.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/amd64/local_apic.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <machine/frame.h>
#include <machine/intr_machdep.h>
#include <machine/apicvar.h>
+#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
@@ -119,6 +120,7 @@ static struct lvt lvts[LVT_MAX + 1] = {
{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */
{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */
{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */
};
static inthand_t *ioint_handlers[] = {
@@ -226,6 +228,9 @@ lapic_init(vm_paddr_t addr)
setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_SYSIGT, SEL_KPL, 0);
/* XXX: Thermal interrupt */
+
+ /* Local APIC CMCI. */
+ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_SYSIGT, SEL_KPL, 0);
}
/*
@@ -251,7 +256,7 @@ lapic_create(u_int apic_id, int boot_cpu
*/
lapics[apic_id].la_present = 1;
lapics[apic_id].la_id = apic_id;
- for (i = 0; i < LVT_MAX; i++) {
+ for (i = 0; i <= LVT_MAX; i++) {
lapics[apic_id].la_lvts[i] = lvts[i];
lapics[apic_id].la_lvts[i].lvt_active = 0;
}
@@ -276,6 +281,7 @@ lapic_dump(const char* str)
printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x pmc: 0x%08x\n",
lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error,
lapic->lvt_pcint);
+ printf(" cmci: 0x%08x\n", lapic->lvt_cmci);
}
void
@@ -350,6 +356,10 @@ lapic_setup(int boot)
}
}
+ /* Program the CMCI LVT entry if present. */
+ if (maxlvt >= LVT_CMCI)
+ lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci);
+
intr_restore(eflags);
}
@@ -853,6 +863,34 @@ lapic_timer_enable_intr(void)
}
void
+lapic_handle_cmc(void)
+{
+
+ lapic_eoi();
+ cmc_intr();
+}
+
+/*
+ * Called from the mca_init() to activate the CMC interrupt if this CPU is
+ * responsible for monitoring any MC banks for CMC events. Since mca_init()
+ * is called prior to lapic_setup() during boot, this just needs to unmask
+ * this CPU's LVT_CMCI entry.
+ */
+void
+lapic_enable_cmc(void)
+{
+ u_int apic_id;
+
+ apic_id = PCPU_GET(apic_id);
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0;
+ lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1;
+ if (bootverbose)
+ printf("lapic%u: CMCI unmasked\n", apic_id);
+}
+
+void
lapic_handle_error(void)
{
u_int32_t esr;
Modified: stable/7/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/7/sys/amd64/amd64/machdep.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/amd64/machdep.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -262,7 +262,6 @@ cpu_startup(dummy)
vm_pager_bufferinit();
cpu_setregs();
- mca_init();
}
/*
Modified: stable/7/sys/amd64/amd64/mca.c
==============================================================================
--- stable/7/sys/amd64/amd64/mca.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/amd64/mca.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -33,6 +33,8 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -43,11 +45,29 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
#include <machine/cputypes.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
+/* Modes for mca_scan() */
+enum scan_mode {
+ POLLED,
+ MCE,
+ CMCI,
+};
+
+/*
+ * State maintained for each monitored MCx bank to control the
+ * corrected machine check interrupt threshold.
+ */
+struct cmc_state {
+ int max_threshold;
+ int last_intr;
+};
+
struct mca_internal {
struct mca_record rec;
int logged;
@@ -79,19 +99,22 @@ static struct callout mca_timer;
static int mca_ticks = 3600; /* Check hourly by default. */
static struct task mca_task;
static struct mtx mca_lock;
+static struct cmc_state **cmc_state; /* Indexed by cpuid, bank */
+static int cmc_banks;
+static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */
static int
-sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
+sysctl_positive_int(SYSCTL_HANDLER_ARGS)
{
int error, value;
- value = mca_ticks;
+ value = *(int *)arg1;
error = sysctl_handle_int(oidp, &value, 0, req);
if (error || req->newptr == NULL)
return (error);
if (value <= 0)
return (EINVAL);
- mca_ticks = value;
+ *(int *)arg1 = value;
return (0);
}
@@ -401,31 +424,112 @@ mca_record_entry(const struct mca_record
}
/*
+ * Update the interrupt threshold for a CMCI. The strategy is to use
+ * a low trigger that interrupts as soon as the first event occurs.
+ * However, if a steady stream of events arrive, the threshold is
+ * increased until the interrupts are throttled to once every
+ * cmc_throttle seconds or the periodic scan. If a periodic scan
+ * finds that the threshold is too high, it is lowered.
+ */
+static void
+cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
+{
+ struct cmc_state *cc;
+ uint64_t ctl;
+ u_int delta;
+ int count, limit;
+
+ /* Fetch the current limit for this bank. */
+ cc = &cmc_state[PCPU_GET(cpuid)][bank];
+ ctl = rdmsr(MSR_MC_CTL2(bank));
+ count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
+ delta = (u_int)(ticks - cc->last_intr);
+
+ /*
+ * If an interrupt was received less than cmc_throttle seconds
+ * since the previous interrupt and the count from the current
+ * event is greater than or equal to the current threshold,
+ * double the threshold up to the max.
+ */
+ if (mode == CMCI && valid) {
+ limit = ctl & MC_CTL2_THRESHOLD;
+ if (delta < cmc_throttle && count >= limit &&
+ limit < cc->max_threshold) {
+ limit = min(limit << 1, cc->max_threshold);
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= limit;
+ wrmsr(MSR_MC_CTL2(bank), limit);
+ }
+ cc->last_intr = ticks;
+ return;
+ }
+
+ /*
+ * When the banks are polled, check to see if the threshold
+ * should be lowered.
+ */
+ if (mode != POLLED)
+ return;
+
+ /* If a CMCI occured recently, do nothing for now. */
+ if (delta < cmc_throttle)
+ return;
+
+ /*
+ * Compute a new limit based on the average rate of events per
+ * cmc_throttle seconds since the last interrupt.
+ */
+ if (valid) {
+ count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
+ limit = count * cmc_throttle / delta;
+ if (limit <= 0)
+ limit = 1;
+ else if (limit > cc->max_threshold)
+ limit = cc->max_threshold;
+ } else
+ limit = 1;
+ if ((ctl & MC_CTL2_THRESHOLD) != limit) {
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= limit;
+ wrmsr(MSR_MC_CTL2(bank), limit);
+ }
+}
+
+/*
* This scans all the machine check banks of the current CPU to see if
* there are any machine checks. Any non-recoverable errors are
* reported immediately via mca_log(). The current thread must be
- * pinned when this is called. The 'mcip' parameter indicates if we
- * are being called from the MC exception handler. In that case this
- * function returns true if the system is restartable. Otherwise, it
- * returns a count of the number of valid MC records found.
+ * pinned when this is called. The 'mode' parameter indicates if we
+ * are being called from the MC exception handler, the CMCI handler,
+ * or the periodic poller. In the MC exception case this function
+ * returns true if the system is restartable. Otherwise, it returns a
+ * count of the number of valid MC records found.
*/
static int
-mca_scan(int mcip)
+mca_scan(enum scan_mode mode)
{
struct mca_record rec;
uint64_t mcg_cap, ucmask;
- int count, i, recoverable;
+ int count, i, recoverable, valid;
count = 0;
recoverable = 1;
ucmask = MC_STATUS_UC | MC_STATUS_PCC;
/* When handling a MCE#, treat the OVER flag as non-restartable. */
- if (mcip)
+ if (mode == MCE)
ucmask |= MC_STATUS_OVER;
mcg_cap = rdmsr(MSR_MCG_CAP);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
- if (mca_check_status(i, &rec)) {
+ /*
+ * For a CMCI, only check banks this CPU is
+ * responsible for.
+ */
+ if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
+ continue;
+
+ valid = mca_check_status(i, &rec);
+ if (valid) {
count++;
if (rec.mr_status & ucmask) {
recoverable = 0;
@@ -433,8 +537,15 @@ mca_scan(int mcip)
}
mca_record_entry(&rec);
}
+
+ /*
+ * If this is a bank this CPU monitors via CMCI,
+ * update the threshold.
+ */
+ if (PCPU_GET(cmci_mask) & (1 << i))
+ cmci_update(mode, i, valid, &rec);
}
- return (mcip ? recoverable : count);
+ return (mode == MCE ? recoverable : count);
}
/*
@@ -457,7 +568,7 @@ mca_scan_cpus(void *context, int pending
continue;
sched_bind(td, cpu);
thread_unlock(td);
- count += mca_scan(0);
+ count += mca_scan(POLLED);
thread_lock(td);
sched_unbind(td);
}
@@ -511,7 +622,24 @@ mca_startup(void *dummy)
SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
static void
-mca_setup(void)
+cmci_setup(uint64_t mcg_cap)
+{
+ int i;
+
+ cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
+ M_MCA, M_WAITOK);
+ cmc_banks = mcg_cap & MCG_CAP_COUNT;
+ for (i = 0; i <= mp_maxid; i++)
+ cmc_state[i] = malloc(sizeof(struct cmc_state) * cmc_banks,
+ M_MCA, M_WAITOK | M_ZERO);
+ SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
+ "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &cmc_throttle, 0, sysctl_positive_int, "I",
+ "Interval in seconds to throttle corrected MC interrupts");
+}
+
+static void
+mca_setup(uint64_t mcg_cap)
{
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
@@ -522,13 +650,62 @@ mca_setup(void)
"count", CTLFLAG_RD, &mca_count, 0, "Record count");
SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
- 0, sysctl_mca_ticks, "I",
+ 0, sysctl_positive_int, "I",
"Periodic interval in seconds to scan for machine checks");
SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
"force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
+ if (mcg_cap & MCG_CAP_CMCI_P)
+ cmci_setup(mcg_cap);
+}
+
+/*
+ * See if we should monitor CMCI for this bank. If CMCI_EN is already
+ * set in MC_CTL2, then another CPU is responsible for this bank, so
+ * ignore it. If CMCI_EN returns zero after being set, then this bank
+ * does not support CMCI_EN. If this CPU sets CMCI_EN, then it should
+ * now monitor this bank.
+ */
+static void
+cmci_monitor(int i)
+{
+ struct cmc_state *cc;
+ uint64_t ctl;
+
+ KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
+
+ ctl = rdmsr(MSR_MC_CTL2(i));
+ if (ctl & MC_CTL2_CMCI_EN)
+ /* Already monitored by another CPU. */
+ return;
+
+ /* Set the threshold to one event for now. */
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= MC_CTL2_CMCI_EN | 1;
+ wrmsr(MSR_MC_CTL2(i), ctl);
+ ctl = rdmsr(MSR_MC_CTL2(i));
+ if (!(ctl & MC_CTL2_CMCI_EN))
+ /* This bank does not support CMCI. */
+ return;
+
+ cc = &cmc_state[PCPU_GET(cpuid)][i];
+
+ /* Determine maximum threshold. */
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= 0x7fff;
+ wrmsr(MSR_MC_CTL2(i), ctl);
+ ctl = rdmsr(MSR_MC_CTL2(i));
+ cc->max_threshold = ctl & MC_CTL2_THRESHOLD;
+
+ /* Start off with a threshold of 1. */
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= 1;
+ wrmsr(MSR_MC_CTL2(i), ctl);
+
+ /* Mark this bank as monitored. */
+ PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
}
/* Must be executed on each CPU. */
@@ -554,14 +731,14 @@ mca_init(void)
workaround_erratum383 = 1;
if (cpu_feature & CPUID_MCA) {
- if (PCPU_GET(cpuid) == 0)
- mca_setup();
+ PCPU_SET(cmci_mask, 0);
- sched_pin();
mcg_cap = rdmsr(MSR_MCG_CAP);
if (mcg_cap & MCG_CAP_CTL_P)
/* Enable MCA features. */
wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
+ if (PCPU_GET(cpuid) == 0)
+ mca_setup(mcg_cap);
/*
* Disable logging of level one TLB parity (L1TP) errors by
@@ -597,15 +774,34 @@ mca_init(void)
if (!skip)
wrmsr(MSR_MC_CTL(i), ctl);
+
+ if (mcg_cap & MCG_CAP_CMCI_P)
+ cmci_monitor(i);
+
/* Clear all errors. */
wrmsr(MSR_MC_STATUS(i), 0);
}
- sched_unpin();
+
+ if (PCPU_GET(cmci_mask) != 0)
+ lapic_enable_cmc();
}
load_cr4(rcr4() | CR4_MCE);
}
+/*
+ * The machine check registers for the BSP cannot be initialized until
+ * the local APIC is initialized. This happens at SI_SUB_CPU,
+ * SI_ORDER_SECOND.
+ */
+static void
+mca_init_bsp(void *arg __unused)
+{
+
+ mca_init();
+}
+SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
+
/* Called when a machine check exception fires. */
int
mca_intr(void)
@@ -624,7 +820,7 @@ mca_intr(void)
}
/* Scan the banks and check for any non-recoverable errors. */
- recoverable = mca_scan(1);
+ recoverable = mca_scan(MCE);
mcg_status = rdmsr(MSR_MCG_STATUS);
if (!(mcg_status & MCG_STATUS_RIPV))
recoverable = 0;
@@ -633,3 +829,31 @@ mca_intr(void)
wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
return (recoverable);
}
+
+/* Called for a CMCI (correctable machine check interrupt). */
+void
+cmc_intr(void)
+{
+ struct mca_internal *mca;
+ int count;
+
+ /*
+ * Serialize MCA bank scanning to prevent collisions from
+ * sibling threads.
+ */
+ count = mca_scan(CMCI);
+
+ /* If we found anything, log them to the console. */
+ if (count != 0) {
+ mtx_lock_spin(&mca_lock);
+ STAILQ_FOREACH(mca, &mca_records, link) {
+ if (!mca->logged) {
+ mca->logged = 1;
+ mtx_unlock_spin(&mca_lock);
+ mca_log(&mca->rec);
+ mtx_lock_spin(&mca_lock);
+ }
+ }
+ mtx_unlock_spin(&mca_lock);
+ }
+}
Modified: stable/7/sys/amd64/include/apicreg.h
==============================================================================
--- stable/7/sys/amd64/include/apicreg.h Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/include/apicreg.h Wed Jul 14 21:10:47 2010 (r210080)
@@ -89,7 +89,7 @@
* 2C0 Reserved
* 2D0 Reserved
* 2E0 Reserved
- * 2F0 Reserved
+ * 2F0 Local Vector Table (CMCI) R/W
* 300 ICR_LOW Interrupt Command Reg. (0-31) R/W
* 310 ICR_HI Interrupt Command Reg. (32-63) R/W
* 320 Local Vector Table (Timer) R/W
@@ -172,7 +172,7 @@ struct LAPIC {
/* reserved */ PAD4;
/* reserved */ PAD4;
/* reserved */ PAD4;
- /* reserved */ PAD4;
+ u_int32_t lvt_cmci; PAD3;
u_int32_t icr_lo; PAD3;
u_int32_t icr_hi; PAD3;
u_int32_t lvt_timer; PAD3;
Modified: stable/7/sys/amd64/include/apicvar.h
==============================================================================
--- stable/7/sys/amd64/include/apicvar.h Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/include/apicvar.h Wed Jul 14 21:10:47 2010 (r210080)
@@ -113,8 +113,9 @@
#define APIC_LOCAL_INTS 240
#define APIC_ERROR_INT APIC_LOCAL_INTS
#define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
+#define APIC_CMC_INT (APIC_LOCAL_INTS + 2)
-#define APIC_IPI_INTS (APIC_LOCAL_INTS + 2)
+#define APIC_IPI_INTS (APIC_LOCAL_INTS + 3)
#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */
#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */
#define IPI_INVLPG (APIC_IPI_INTS + 2)
@@ -143,7 +144,8 @@
#define LVT_ERROR 3
#define LVT_PMC 4
#define LVT_THERMAL 5
-#define LVT_MAX LVT_THERMAL
+#define LVT_CMCI 6
+#define LVT_MAX LVT_CMCI
#ifndef LOCORE
@@ -173,8 +175,8 @@ struct apic_enumerator {
inthand_t
IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
- IDTVEC(apic_isr7), IDTVEC(errorint), IDTVEC(spuriousint),
- IDTVEC(timerint);
+ IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
+ IDTVEC(spuriousint), IDTVEC(timerint);
extern vm_paddr_t lapic_paddr;
@@ -201,6 +203,7 @@ void lapic_create(u_int apic_id, int boo
void lapic_disable(void);
void lapic_disable_pmc(void);
void lapic_dump(const char *str);
+void lapic_enable_cmc(void);
int lapic_enable_pmc(void);
void lapic_eoi(void);
int lapic_id(void);
@@ -209,6 +212,7 @@ int lapic_intr_pending(u_int vector);
void lapic_ipi_raw(register_t icrlo, u_int dest);
void lapic_ipi_vectored(u_int vector, int dest);
int lapic_ipi_wait(int delay);
+void lapic_handle_cmc(void);
void lapic_handle_error(void);
void lapic_handle_intr(int vector, struct trapframe *frame);
void lapic_handle_timer(struct trapframe *frame);
Modified: stable/7/sys/amd64/include/mca.h
==============================================================================
--- stable/7/sys/amd64/include/mca.h Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/include/mca.h Wed Jul 14 21:10:47 2010 (r210080)
@@ -46,6 +46,7 @@ struct mca_record {
#ifdef _KERNEL
+void cmc_intr(void);
void mca_init(void);
int mca_intr(void);
Modified: stable/7/sys/amd64/include/pcpu.h
==============================================================================
--- stable/7/sys/amd64/include/pcpu.h Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/include/pcpu.h Wed Jul 14 21:10:47 2010 (r210080)
@@ -48,7 +48,8 @@
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
u_int pc_acpi_id; /* ACPI CPU id */ \
- struct user_segment_descriptor *pc_gs32p
+ struct user_segment_descriptor *pc_gs32p; \
+ u_int pc_cmci_mask /* MCx banks for CMCI */
#ifdef _KERNEL
Modified: stable/7/sys/amd64/include/specialreg.h
==============================================================================
--- stable/7/sys/amd64/include/specialreg.h Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/amd64/include/specialreg.h Wed Jul 14 21:10:47 2010 (r210080)
@@ -376,7 +376,7 @@
#define MC_STATUS_VAL 0x8000000000000000
#define MC_MISC_RA_LSB 0x000000000000003f /* If MCG_CAP_SER_P */
#define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */
-#define MC_CTL2_THRESHOLD 0x0000000000003fff
+#define MC_CTL2_THRESHOLD 0x0000000000007fff
#define MC_CTL2_CMCI_EN 0x0000000040000000
/*
Modified: stable/7/sys/i386/i386/apic_vector.s
==============================================================================
--- stable/7/sys/i386/i386/apic_vector.s Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/i386/i386/apic_vector.s Wed Jul 14 21:10:47 2010 (r210080)
@@ -111,6 +111,19 @@ IDTVEC(timerint)
jmp doreti
/*
+ * Local APIC CMCI handler.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(cmcint)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ FAKE_MCOUNT(TF_EIP(%esp))
+ call lapic_handle_cmc
+ MEXITCOUNT
+ jmp doreti
+
+/*
* Local APIC error interrupt handler.
*/
.text
Modified: stable/7/sys/i386/i386/local_apic.c
==============================================================================
--- stable/7/sys/i386/i386/local_apic.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/i386/i386/local_apic.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <machine/frame.h>
#include <machine/intr_machdep.h>
#include <machine/apicvar.h>
+#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
@@ -119,6 +120,7 @@ static struct lvt lvts[LVT_MAX + 1] = {
{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT }, /* Error */
{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 }, /* PMC */
{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT }, /* Thermal */
+ { 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT }, /* CMCI */
};
static inthand_t *ioint_handlers[] = {
@@ -229,6 +231,10 @@ lapic_init(vm_paddr_t addr)
GSEL(GCODE_SEL, SEL_KPL));
/* XXX: Thermal interrupt */
+
+ /* Local APIC CMCI. */
+ setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_SYS386TGT, SEL_KPL,
+ GSEL(GCODE_SEL, SEL_KPL));
}
/*
@@ -254,7 +260,7 @@ lapic_create(u_int apic_id, int boot_cpu
*/
lapics[apic_id].la_present = 1;
lapics[apic_id].la_id = apic_id;
- for (i = 0; i < LVT_MAX; i++) {
+ for (i = 0; i <= LVT_MAX; i++) {
lapics[apic_id].la_lvts[i] = lvts[i];
lapics[apic_id].la_lvts[i].lvt_active = 0;
}
@@ -279,6 +285,7 @@ lapic_dump(const char* str)
printf(" timer: 0x%08x therm: 0x%08x err: 0x%08x pmc: 0x%08x\n",
lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error,
lapic->lvt_pcint);
+ printf(" cmci: 0x%08x\n", lapic->lvt_cmci);
}
void
@@ -353,6 +360,10 @@ lapic_setup(int boot)
}
}
+ /* Program the CMCI LVT entry if present. */
+ if (maxlvt >= LVT_CMCI)
+ lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci);
+
intr_restore(eflags);
}
@@ -856,6 +867,34 @@ lapic_timer_enable_intr(void)
}
void
+lapic_handle_cmc(void)
+{
+
+ lapic_eoi();
+ cmc_intr();
+}
+
+/*
+ * Called from the mca_init() to activate the CMC interrupt if this CPU is
+ * responsible for monitoring any MC banks for CMC events. Since mca_init()
+ * is called prior to lapic_setup() during boot, this just needs to unmask
+ * this CPU's LVT_CMCI entry.
+ */
+void
+lapic_enable_cmc(void)
+{
+ u_int apic_id;
+
+ apic_id = PCPU_GET(apic_id);
+ KASSERT(lapics[apic_id].la_present,
+ ("%s: missing APIC %u", __func__, apic_id));
+ lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0;
+ lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1;
+ if (bootverbose)
+ printf("lapic%u: CMCI unmasked\n", apic_id);
+}
+
+void
lapic_handle_error(void)
{
u_int32_t esr;
Modified: stable/7/sys/i386/i386/machdep.c
==============================================================================
--- stable/7/sys/i386/i386/machdep.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/i386/i386/machdep.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -291,7 +291,6 @@ cpu_startup(dummy)
vm_pager_bufferinit();
cpu_setregs();
- mca_init();
}
/*
Modified: stable/7/sys/i386/i386/mca.c
==============================================================================
--- stable/7/sys/i386/i386/mca.c Wed Jul 14 21:10:14 2010 (r210079)
+++ stable/7/sys/i386/i386/mca.c Wed Jul 14 21:10:47 2010 (r210080)
@@ -32,7 +32,11 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_apic.h"
+
#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -43,11 +47,31 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
#include <machine/cputypes.h>
#include <machine/mca.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
+/* Modes for mca_scan() */
+enum scan_mode {
+ POLLED,
+ MCE,
+ CMCI,
+};
+
+#ifdef DEV_APIC
+/*
+ * State maintained for each monitored MCx bank to control the
+ * corrected machine check interrupt threshold.
+ */
+struct cmc_state {
+ int max_threshold;
+ int last_intr;
+};
+#endif
+
struct mca_internal {
struct mca_record rec;
int logged;
@@ -80,18 +104,24 @@ static int mca_ticks = 3600; /* Check ho
static struct task mca_task;
static struct mtx mca_lock;
+#ifdef DEV_APIC
+static struct cmc_state **cmc_state; /* Indexed by cpuid, bank */
+static int cmc_banks;
+static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */
+#endif
+
static int
-sysctl_mca_ticks(SYSCTL_HANDLER_ARGS)
+sysctl_positive_int(SYSCTL_HANDLER_ARGS)
{
int error, value;
- value = mca_ticks;
+ value = *(int *)arg1;
error = sysctl_handle_int(oidp, &value, 0, req);
if (error || req->newptr == NULL)
return (error);
if (value <= 0)
return (EINVAL);
- mca_ticks = value;
+ *(int *)arg1 = value;
return (0);
}
@@ -400,32 +430,117 @@ mca_record_entry(const struct mca_record
mtx_unlock_spin(&mca_lock);
}
+#ifdef DEV_APIC
+/*
+ * Update the interrupt threshold for a CMCI. The strategy is to use
+ * a low trigger that interrupts as soon as the first event occurs.
+ * However, if a steady stream of events arrive, the threshold is
+ * increased until the interrupts are throttled to once every
+ * cmc_throttle seconds or the periodic scan. If a periodic scan
+ * finds that the threshold is too high, it is lowered.
+ */
+static void
+cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
+{
+ struct cmc_state *cc;
+ uint64_t ctl;
+ u_int delta;
+ int count, limit;
+
+ /* Fetch the current limit for this bank. */
+ cc = &cmc_state[PCPU_GET(cpuid)][bank];
+ ctl = rdmsr(MSR_MC_CTL2(bank));
+ count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
+ delta = (u_int)(ticks - cc->last_intr);
+
+ /*
+ * If an interrupt was received less than cmc_throttle seconds
+ * since the previous interrupt and the count from the current
+ * event is greater than or equal to the current threshold,
+ * double the threshold up to the max.
+ */
+ if (mode == CMCI && valid) {
+ limit = ctl & MC_CTL2_THRESHOLD;
+ if (delta < cmc_throttle && count >= limit &&
+ limit < cc->max_threshold) {
+ limit = min(limit << 1, cc->max_threshold);
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= limit;
+ wrmsr(MSR_MC_CTL2(bank), limit);
+ }
+ cc->last_intr = ticks;
+ return;
+ }
+
+ /*
+ * When the banks are polled, check to see if the threshold
+ * should be lowered.
+ */
+ if (mode != POLLED)
+ return;
+
+ /* If a CMCI occured recently, do nothing for now. */
+ if (delta < cmc_throttle)
+ return;
+
+ /*
+ * Compute a new limit based on the average rate of events per
+ * cmc_throttle seconds since the last interrupt.
+ */
+ if (valid) {
+ count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
+ limit = count * cmc_throttle / delta;
+ if (limit <= 0)
+ limit = 1;
+ else if (limit > cc->max_threshold)
+ limit = cc->max_threshold;
+ } else
+ limit = 1;
+ if ((ctl & MC_CTL2_THRESHOLD) != limit) {
+ ctl &= ~MC_CTL2_THRESHOLD;
+ ctl |= limit;
+ wrmsr(MSR_MC_CTL2(bank), limit);
+ }
+}
+#endif
+
/*
* This scans all the machine check banks of the current CPU to see if
* there are any machine checks. Any non-recoverable errors are
* reported immediately via mca_log(). The current thread must be
- * pinned when this is called. The 'mcip' parameter indicates if we
- * are being called from the MC exception handler. In that case this
- * function returns true if the system is restartable. Otherwise, it
- * returns a count of the number of valid MC records found.
+ * pinned when this is called. The 'mode' parameter indicates if we
+ * are being called from the MC exception handler, the CMCI handler,
+ * or the periodic poller. In the MC exception case this function
+ * returns true if the system is restartable. Otherwise, it returns a
+ * count of the number of valid MC records found.
*/
static int
-mca_scan(int mcip)
+mca_scan(enum scan_mode mode)
{
struct mca_record rec;
uint64_t mcg_cap, ucmask;
- int count, i, recoverable;
+ int count, i, recoverable, valid;
count = 0;
recoverable = 1;
ucmask = MC_STATUS_UC | MC_STATUS_PCC;
/* When handling a MCE#, treat the OVER flag as non-restartable. */
- if (mcip)
+ if (mode == MCE)
ucmask |= MC_STATUS_OVER;
mcg_cap = rdmsr(MSR_MCG_CAP);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
- if (mca_check_status(i, &rec)) {
+#ifdef DEV_APIC
+ /*
+ * For a CMCI, only check banks this CPU is
+ * responsible for.
+ */
+ if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
+ continue;
+#endif
+
+ valid = mca_check_status(i, &rec);
+ if (valid) {
count++;
if (rec.mr_status & ucmask) {
recoverable = 0;
@@ -433,8 +548,17 @@ mca_scan(int mcip)
}
mca_record_entry(&rec);
}
+
+#ifdef DEV_APIC
+ /*
+ * If this is a bank this CPU monitors via CMCI,
+ * update the threshold.
+ */
+ if (PCPU_GET(cmci_mask) & (1 << i))
+ cmci_update(mode, i, valid, &rec);
+#endif
}
- return (mcip ? recoverable : count);
+ return (mode == MCE ? recoverable : count);
}
/*
@@ -457,7 +581,7 @@ mca_scan_cpus(void *context, int pending
continue;
sched_bind(td, cpu);
thread_unlock(td);
- count += mca_scan(0);
+ count += mca_scan(POLLED);
thread_lock(td);
sched_unbind(td);
}
@@ -510,8 +634,27 @@ mca_startup(void *dummy)
}
SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
+#ifdef DEV_APIC
static void
-mca_setup(void)
+cmci_setup(uint64_t mcg_cap)
+{
+ int i;
+
+ cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
+ M_MCA, M_WAITOK);
+ cmc_banks = mcg_cap & MCG_CAP_COUNT;
+ for (i = 0; i <= mp_maxid; i++)
+ cmc_state[i] = malloc(sizeof(struct cmc_state) * cmc_banks,
+ M_MCA, M_WAITOK | M_ZERO);
+ SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
+ "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &cmc_throttle, 0, sysctl_positive_int, "I",
+ "Interval in seconds to throttle corrected MC interrupts");
+}
+#endif
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-7
mailing list