svn commit: r335924 - in head/sys: dev/cxgbe/cxgbei dev/cxgbe/tom dev/hwpmc kern net netinet netinet/tcp_stacks netinet6 sys
Matt Macy
mmacy at FreeBSD.org
Wed Jul 4 02:47:20 UTC 2018
Author: mmacy
Date: Wed Jul 4 02:47:16 2018
New Revision: 335924
URL: https://svnweb.freebsd.org/changeset/base/335924
Log:
epoch(9): allow preemptible epochs to compose
- Add tracker argument to preemptible epochs
- Inline epoch read path in kernel and tied modules
- Change in_epoch to take an epoch as argument
- Simplify tfb_tcp_do_segment to not take a ti_locked argument,
there's no longer any benefit to dropping the pcbinfo lock
and trying to do so just adds an error prone branchfest to
these functions
- Remove cases of same function recursion on the epoch as
recursing is no longer free.
- Remove the the TAILQ_ENTRY and epoch_section from struct
thread as the tracker field is now stack or heap allocated
as appropriate.
Tested by: pho and Limelight Networks
Reviewed by: kbowling at llnw dot com
Sponsored by: Limelight Networks
Differential Revision: https://reviews.freebsd.org/D16066
Added:
head/sys/sys/epoch_private.h (contents, props changed)
Modified:
head/sys/dev/cxgbe/cxgbei/cxgbei.c
head/sys/dev/cxgbe/tom/t4_connect.c
head/sys/dev/cxgbe/tom/t4_cpl_io.c
head/sys/dev/cxgbe/tom/t4_listen.c
head/sys/dev/cxgbe/tom/t4_tls.c
head/sys/dev/hwpmc/hwpmc_mod.c
head/sys/kern/subr_epoch.c
head/sys/net/if.c
head/sys/net/if_gif.h
head/sys/net/if_gre.h
head/sys/net/if_lagg.c
head/sys/net/if_me.c
head/sys/net/if_var.h
head/sys/net/route.c
head/sys/net/rtsock.c
head/sys/netinet/in_gif.c
head/sys/netinet/in_pcb.c
head/sys/netinet/in_pcb.h
head/sys/netinet/ip_divert.c
head/sys/netinet/ip_encap.c
head/sys/netinet/ip_gre.c
head/sys/netinet/raw_ip.c
head/sys/netinet/tcp_hpts.c
head/sys/netinet/tcp_hpts.h
head/sys/netinet/tcp_input.c
head/sys/netinet/tcp_stacks/fastpath.c
head/sys/netinet/tcp_stacks/rack.c
head/sys/netinet/tcp_stacks/tcp_rack.h
head/sys/netinet/tcp_subr.c
head/sys/netinet/tcp_timer.c
head/sys/netinet/tcp_timer.h
head/sys/netinet/tcp_timewait.c
head/sys/netinet/tcp_usrreq.c
head/sys/netinet/tcp_var.h
head/sys/netinet/udp_usrreq.c
head/sys/netinet6/icmp6.c
head/sys/netinet6/in6_gif.c
head/sys/netinet6/ip6_gre.c
head/sys/netinet6/raw_ip6.c
head/sys/netinet6/udp6_usrreq.c
head/sys/sys/epoch.h
head/sys/sys/pmckern.h
head/sys/sys/proc.h
Modified: head/sys/dev/cxgbe/cxgbei/cxgbei.c
==============================================================================
--- head/sys/dev/cxgbe/cxgbei/cxgbei.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/cxgbe/cxgbei/cxgbei.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -343,6 +343,7 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_he
struct icl_cxgbei_pdu *icp = toep->ulpcb2;
struct icl_pdu *ip;
u_int pdu_len, val;
+ struct epoch_tracker et;
MPASS(m == NULL);
@@ -411,12 +412,12 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_he
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
Modified: head/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_connect.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/cxgbe/tom/t4_connect.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -115,18 +115,19 @@ act_open_failure_cleanup(struct adapter *sc, u_int ati
struct toepcb *toep = lookup_atid(sc, atid);
struct inpcb *inp = toep->inp;
struct toedev *tod = &toep->td->tod;
+ struct epoch_tracker et;
free_atid(sc, atid);
toep->tid = -1;
CURVNET_SET(toep->vnet);
if (status != EAGAIN)
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
}
Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -1235,6 +1235,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_head
struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL;
struct socket *so;
+ struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1268,7 +1269,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_head
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1312,7 +1313,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_head
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1325,7 +1326,7 @@ do_peer_close(struct sge_iq *iq, const struct rss_head
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
}
@@ -1344,6 +1345,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_h
struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL;
struct socket *so = NULL;
+ struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1354,7 +1356,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_h
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1372,7 +1374,7 @@ do_close_con_rpl(struct sge_iq *iq, const struct rss_h
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1397,7 +1399,7 @@ release:
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
}
@@ -1452,6 +1454,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_heade
struct sge_wrq *ofld_txq = toep->ofld_txq;
struct inpcb *inp;
struct tcpcb *tp;
+ struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1473,7 +1476,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_heade
inp = toep->inp;
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for tcp_close */
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1507,7 +1510,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_heade
final_cpl_received(toep);
done:
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
@@ -1560,6 +1563,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
struct tcpcb *tp;
struct socket *so;
struct sockbuf *sb;
+ struct epoch_tracker et;
int len;
uint32_t ddp_placed = 0;
@@ -1631,12 +1635,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
Modified: head/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_listen.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/cxgbe/tom/t4_listen.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -1255,6 +1255,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss
int reject_reason, v, ntids;
uint16_t vid;
u_int wnd;
+ struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1369,15 +1370,15 @@ found:
REJECT_PASS_ACCEPT();
rpl = wrtod(wr);
- INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for 4-tuple check */
/* Don't offload if the 4-tuple is already in use */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
free(wr, M_CXGBE);
REJECT_PASS_ACCEPT();
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
inp = lctx->inp; /* listening socket, not owned by TOE */
INP_WLOCK(inp);
@@ -1574,6 +1575,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_
struct tcpopt to;
struct in_conninfo inc;
struct toepcb *toep;
+ struct epoch_tracker et;
u_int txqid, rxqid;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
@@ -1587,7 +1589,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
CURVNET_SET(lctx->vnet);
- INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for syncache_expand */
INP_WLOCK(inp);
CTR6(KTR_CXGBE,
@@ -1603,7 +1605,7 @@ do_pass_establish(struct sge_iq *iq, const struct rss_
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
}
@@ -1629,7 +1631,7 @@ reset:
*/
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
}
@@ -1695,7 +1697,7 @@ reset:
inp = release_lctx(sc, lctx);
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
release_synqe(synqe);
Modified: head/sys/dev/cxgbe/tom/t4_tls.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tls.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/cxgbe/tom/t4_tls.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -1559,6 +1559,8 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_head
SOCKBUF_LOCK(sb);
if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
+ struct epoch_tracker et;
+
CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
__func__, tid, pdu_length);
m_freem(m);
@@ -1566,12 +1568,12 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_head
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
return (0);
Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/dev/hwpmc/hwpmc_mod.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -85,6 +85,9 @@ __FBSDID("$FreeBSD$");
#define free_domain(addr, type) free(addr, type)
#endif
+#define PMC_EPOCH_ENTER() struct epoch_tracker pmc_et; epoch_enter_preempt(global_epoch_preempt, &pmc_et)
+#define PMC_EPOCH_EXIT() epoch_exit_preempt(global_epoch_preempt, &pmc_et)
+
/*
* Types
*/
@@ -1752,12 +1755,12 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
const struct pmc_process *pp;
freepath = fullpath = NULL;
- MPASS(!in_epoch());
+ MPASS(!in_epoch(global_epoch_preempt));
pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
pid = td->td_proc->p_pid;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
/* Inform owners of all system-wide sampling PMCs. */
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
@@ -1778,7 +1781,7 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
done:
if (freepath)
free(freepath, M_TEMP);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
}
@@ -1797,12 +1800,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_m
pid = td->td_proc->p_pid;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, pid, pkm->pm_address,
pkm->pm_address + pkm->pm_size);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
return;
@@ -1824,7 +1827,7 @@ pmc_log_kernel_mappings(struct pmc *pm)
struct pmc_owner *po;
struct pmckern_map_in *km, *kmbase;
- MPASS(in_epoch() || sx_xlocked(&pmc_sx));
+ MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
("[pmc,%d] non-sampling PMC (%p) desires mapping information",
__LINE__, (void *) pm));
@@ -2106,13 +2109,13 @@ pmc_hook_handler(struct thread *td, int function, void
pk = (struct pmckern_procexec *) arg;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
/* Inform owners of SS mode PMCs of the exec event. */
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_procexec(po, PMC_ID_INVALID,
p->p_pid, pk->pm_entryaddr, fullpath);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
PROC_LOCK(p);
is_using_hwpmcs = p->p_flag & P_HWPMC;
@@ -2242,7 +2245,7 @@ pmc_hook_handler(struct thread *td, int function, void
break;
case PMC_FN_MUNMAP:
- MPASS(in_epoch() || sx_xlocked(&pmc_sx));
+ MPASS(in_epoch(global_epoch_preempt) || sx_xlocked(&pmc_sx));
pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break;
@@ -2479,7 +2482,7 @@ pmc_find_thread_descriptor(struct pmc_process *pp, str
if (mode & PMC_FLAG_ALLOCATE) {
if ((ptnew = pmc_thread_descriptor_pool_alloc()) == NULL) {
wait_flag = M_WAITOK;
- if ((mode & PMC_FLAG_NOWAIT) || in_epoch())
+ if ((mode & PMC_FLAG_NOWAIT) || in_epoch(global_epoch_preempt))
wait_flag = M_NOWAIT;
ptnew = malloc(THREADENTRY_SIZE, M_PMC,
@@ -5070,11 +5073,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
/*
* Log a sysexit event to all SS PMC owners.
*/
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_sysexit(po, p->p_pid);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
if (!is_using_hwpmcs)
return;
@@ -5255,13 +5258,13 @@ pmc_process_fork(void *arg __unused, struct proc *p1,
* If there are system-wide sampling PMCs active, we need to
* log all fork events to their owner's logs.
*/
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
pmclog_process_proccreate(po, newproc, 1);
}
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
if (!is_using_hwpmcs)
return;
@@ -5327,11 +5330,11 @@ pmc_process_threadcreate(struct thread *td)
{
struct pmc_owner *po;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_threadcreate(po, td, 1);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
}
static void
@@ -5339,11 +5342,11 @@ pmc_process_threadexit(struct thread *td)
{
struct pmc_owner *po;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_threadexit(po, td);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
}
static void
@@ -5351,11 +5354,11 @@ pmc_process_proccreate(struct proc *p)
{
struct pmc_owner *po;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_proccreate(po, p, 1 /* sync */);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
}
static void
@@ -5388,12 +5391,12 @@ pmc_kld_load(void *arg __unused, linker_file_t lf)
/*
* Notify owners of system sampling PMCs about KLD operations.
*/
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, (pid_t) -1,
(uintfptr_t) lf->address, lf->filename);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
/*
* TODO: Notify owners of (all) process-sampling PMCs too.
@@ -5406,12 +5409,12 @@ pmc_kld_unload(void *arg __unused, const char *filenam
{
struct pmc_owner *po;
- epoch_enter_preempt(global_epoch_preempt);
+ PMC_EPOCH_ENTER();
CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1,
(uintfptr_t) address, (uintfptr_t) address + size);
- epoch_exit_preempt(global_epoch_preempt);
+ PMC_EPOCH_EXIT();
/*
* TODO: Notify owners of process-sampling PMCs.
Modified: head/sys/kern/subr_epoch.c
==============================================================================
--- head/sys/kern/subr_epoch.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/kern/subr_epoch.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -58,18 +58,10 @@ static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based re
#define MAX_ADAPTIVE_SPIN 1000
#define MAX_EPOCHS 64
-#ifdef __amd64__
-#define EPOCH_ALIGN CACHE_LINE_SIZE*2
-#else
-#define EPOCH_ALIGN CACHE_LINE_SIZE
-#endif
-
-CTASSERT(sizeof(epoch_section_t) == sizeof(ck_epoch_section_t));
CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information");
SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats");
-
/* Stats. */
static counter_u64_t block_count;
@@ -100,27 +92,9 @@ TAILQ_HEAD (threadlist, thread);
CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
ck_epoch_entry_container)
-typedef struct epoch_record {
- ck_epoch_record_t er_record;
- volatile struct threadlist er_tdlist;
- volatile uint32_t er_gen;
- uint32_t er_cpuid;
-} *epoch_record_t;
-struct epoch_pcpu_state {
- struct epoch_record eps_record;
-} __aligned(EPOCH_ALIGN);
+ epoch_t allepochs[MAX_EPOCHS];
-struct epoch {
- struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
- struct epoch_pcpu_state *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
- int e_idx;
- int e_flags;
- struct epoch_pcpu_state *e_pcpu[0];
-};
-
-epoch_t allepochs[MAX_EPOCHS];
-
DPCPU_DEFINE(struct grouptask, epoch_cb_task);
DPCPU_DEFINE(int, epoch_cb_count);
@@ -192,17 +166,15 @@ static void
epoch_init_numa(epoch_t epoch)
{
int domain, cpu_offset;
- struct epoch_pcpu_state *eps;
epoch_record_t er;
for (domain = 0; domain < vm_ndomains; domain++) {
- eps = malloc_domain(sizeof(*eps) * domcount[domain], M_EPOCH,
+ er = malloc_domain(sizeof(*er) * domcount[domain], M_EPOCH,
domain, M_ZERO | M_WAITOK);
- epoch->e_pcpu_dom[domain] = eps;
+ epoch->e_pcpu_dom[domain] = er;
cpu_offset = domoffsets[domain];
- for (int i = 0; i < domcount[domain]; i++, eps++) {
- epoch->e_pcpu[cpu_offset + i] = eps;
- er = &eps->eps_record;
+ for (int i = 0; i < domcount[domain]; i++, er++) {
+ epoch->e_pcpu[cpu_offset + i] = er;
ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
er->er_cpuid = cpu_offset + i;
@@ -213,14 +185,12 @@ epoch_init_numa(epoch_t epoch)
static void
epoch_init_legacy(epoch_t epoch)
{
- struct epoch_pcpu_state *eps;
epoch_record_t er;
- eps = malloc(sizeof(*eps) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK);
- epoch->e_pcpu_dom[0] = eps;
- for (int i = 0; i < mp_ncpus; i++, eps++) {
- epoch->e_pcpu[i] = eps;
- er = &eps->eps_record;
+ er = malloc(sizeof(*er) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK);
+ epoch->e_pcpu_dom[0] = er;
+ for (int i = 0; i < mp_ncpus; i++, er++) {
+ epoch->e_pcpu[i] = er;
ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
er->er_cpuid = i;
@@ -253,12 +223,12 @@ epoch_free(epoch_t epoch)
{
int domain;
#ifdef INVARIANTS
- struct epoch_pcpu_state *eps;
+ struct epoch_record *er;
int cpu;
CPU_FOREACH(cpu) {
- eps = epoch->e_pcpu[cpu];
- MPASS(TAILQ_EMPTY(&eps->eps_record.er_tdlist));
+ er = epoch->e_pcpu[cpu];
+ MPASS(TAILQ_EMPTY(&er->er_tdlist));
}
#endif
allepochs[epoch->e_idx] = NULL;
@@ -271,95 +241,32 @@ epoch_free(epoch_t epoch)
free(epoch, M_EPOCH);
}
-#define INIT_CHECK(epoch) \
- do { \
- if (__predict_false((epoch) == NULL)) \
- return; \
- } while (0)
-
void
-epoch_enter_preempt_internal(epoch_t epoch, struct thread *td)
+epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
{
- struct epoch_pcpu_state *eps;
- MPASS(cold || epoch != NULL);
- INIT_CHECK(epoch);
- MPASS(epoch->e_flags & EPOCH_PREEMPT);
- critical_enter();
- td->td_pre_epoch_prio = td->td_priority;
- eps = epoch->e_pcpu[curcpu];
-#ifdef INVARIANTS
- MPASS(td->td_epochnest < UCHAR_MAX - 2);
- if (td->td_epochnest > 1) {
- struct thread *curtd;
- int found = 0;
-
- TAILQ_FOREACH(curtd, &eps->eps_record.er_tdlist, td_epochq)
- if (curtd == td)
- found = 1;
- KASSERT(found, ("recursing on a second epoch"));
- critical_exit();
- return;
- }
-#endif
- TAILQ_INSERT_TAIL(&eps->eps_record.er_tdlist, td, td_epochq);
- sched_pin();
- ck_epoch_begin(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
- critical_exit();
+ epoch_enter_preempt(epoch, et);
}
-
void
-epoch_enter(epoch_t epoch)
+epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
{
- ck_epoch_record_t *record;
- struct thread *td;
- MPASS(cold || epoch != NULL);
- INIT_CHECK(epoch);
- td = curthread;
-
- critical_enter();
- td->td_epochnest++;
- record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
- ck_epoch_begin(record, NULL);
+ epoch_exit_preempt(epoch, et);
}
void
-epoch_exit_preempt_internal(epoch_t epoch, struct thread *td)
+epoch_enter_KBI(epoch_t epoch)
{
- struct epoch_pcpu_state *eps;
- MPASS(td->td_epochnest == 0);
- INIT_CHECK(epoch);
- critical_enter();
- eps = epoch->e_pcpu[curcpu];
-
- MPASS(epoch->e_flags & EPOCH_PREEMPT);
- ck_epoch_end(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
- TAILQ_REMOVE(&eps->eps_record.er_tdlist, td, td_epochq);
- eps->eps_record.er_gen++;
- sched_unpin();
- if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) {
- thread_lock(td);
- sched_prio(td, td->td_pre_epoch_prio);
- thread_unlock(td);
- }
- critical_exit();
+ epoch_enter(epoch);
}
void
-epoch_exit(epoch_t epoch)
+epoch_exit_KBI(epoch_t epoch)
{
- ck_epoch_record_t *record;
- struct thread *td;
- INIT_CHECK(epoch);
- td = curthread;
- td->td_epochnest--;
- record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
- ck_epoch_end(record, NULL);
- critical_exit();
+ epoch_exit(epoch);
}
/*
@@ -371,7 +278,8 @@ epoch_block_handler_preempt(struct ck_epoch *global __
void *arg __unused)
{
epoch_record_t record;
- struct thread *td, *tdwait, *owner;
+ struct thread *td, *owner, *curwaittd;
+ struct epoch_thread *tdwait;
struct turnstile *ts;
struct lock_object *lock;
int spincount, gen;
@@ -389,13 +297,13 @@ epoch_block_handler_preempt(struct ck_epoch *global __
* overhead of a migration
*/
if ((tdwait = TAILQ_FIRST(&record->er_tdlist)) != NULL &&
- TD_IS_RUNNING(tdwait)) {
+ TD_IS_RUNNING(tdwait->et_td)) {
gen = record->er_gen;
thread_unlock(td);
do {
cpu_spinwait();
} while (tdwait == TAILQ_FIRST(&record->er_tdlist) &&
- gen == record->er_gen && TD_IS_RUNNING(tdwait) &&
+ gen == record->er_gen && TD_IS_RUNNING(tdwait->et_td) &&
spincount++ < MAX_ADAPTIVE_SPIN);
thread_lock(td);
return;
@@ -426,28 +334,29 @@ epoch_block_handler_preempt(struct ck_epoch *global __
* priority thread (highest prio value) and drop our priority
* to match to allow it to run.
*/
- TAILQ_FOREACH(tdwait, &record->er_tdlist, td_epochq) {
+ TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
/*
* Propagate our priority to any other waiters to prevent us
* from starving them. They will have their original priority
* restore on exit from epoch_wait().
*/
- if (!TD_IS_INHIBITED(tdwait) && tdwait->td_priority > td->td_priority) {
+ curwaittd = tdwait->et_td;
+ if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
critical_enter();
thread_unlock(td);
- thread_lock(tdwait);
- sched_prio(tdwait, td->td_priority);
- thread_unlock(tdwait);
+ thread_lock(curwaittd);
+ sched_prio(curwaittd, td->td_priority);
+ thread_unlock(curwaittd);
thread_lock(td);
critical_exit();
}
- if (TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait) &&
- ((ts = tdwait->td_blocked) != NULL)) {
+ if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
+ ((ts = curwaittd->td_blocked) != NULL)) {
/*
* We unlock td to allow turnstile_wait to reacquire the
* the thread lock. Before unlocking it we enter a critical
* section to prevent preemption after we reenable interrupts
- * by dropping the thread lock in order to prevent tdwait
+ * by dropping the thread lock in order to prevent curwaittd
* from getting to run.
*/
critical_enter();
@@ -456,15 +365,15 @@ epoch_block_handler_preempt(struct ck_epoch *global __
/*
* The owner pointer indicates that the lock succeeded. Only
* in case we hold the lock and the turnstile we locked is still
- * the one that tdwait is blocked on can we continue. Otherwise
+ * the one that curwaittd is blocked on can we continue. Otherwise
* The turnstile pointer has been changed out from underneath
- * us, as in the case where the lock holder has signalled tdwait,
+ * us, as in the case where the lock holder has signalled curwaittd,
* and we need to continue.
*/
- if (owner != NULL && ts == tdwait->td_blocked) {
- MPASS(TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait));
+ if (owner != NULL && ts == curwaittd->td_blocked) {
+ MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd));
critical_exit();
- turnstile_wait(ts, owner, tdwait->td_tsqueue);
+ turnstile_wait(ts, owner, curwaittd->td_tsqueue);
counter_u64_add(turnstile_count, 1);
thread_lock(td);
return;
@@ -569,7 +478,7 @@ epoch_wait(epoch_t epoch)
void
epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t))
{
- struct epoch_pcpu_state *eps;
+ epoch_record_t er;
ck_epoch_entry_t *cb;
cb = (void *)ctx;
@@ -585,8 +494,8 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*
critical_enter();
*DPCPU_PTR(epoch_cb_count) += 1;
- eps = epoch->e_pcpu[curcpu];
- ck_epoch_call(&eps->eps_record.er_record, cb, (ck_epoch_cb_t *)callback);
+ er = epoch->e_pcpu[curcpu];
+ ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
critical_exit();
return;
boottime:
@@ -608,7 +517,7 @@ epoch_call_task(void *arg __unused)
for (total = i = 0; i < epoch_count; i++) {
if (__predict_false((epoch = allepochs[i]) == NULL))
continue;
- record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
+ record = &epoch->e_pcpu[curcpu]->er_record;
if ((npending = record->n_pending) == 0)
continue;
ck_epoch_poll_deferred(record, &cb_stack);
@@ -632,7 +541,47 @@ epoch_call_task(void *arg __unused)
}
int
-in_epoch(void)
+in_epoch_verbose(epoch_t epoch, int dump_onfail)
{
- return (curthread->td_epochnest != 0);
+ struct epoch_thread *tdwait;
+ struct thread *td;
+ epoch_record_t er;
+
+ td = curthread;
+ if (td->td_epochnest == 0)
+ return (0);
+ if (__predict_false((epoch) == NULL))
+ return (0);
+ critical_enter();
+ er = epoch->e_pcpu[curcpu];
+ TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
+ if (tdwait->et_td == td) {
+ critical_exit();
+ return (1);
+ }
+#ifdef INVARIANTS
+ if (dump_onfail) {
+ MPASS(td->td_pinned);
+ printf("cpu: %d id: %d\n", curcpu, td->td_tid);
+ TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
+ printf("td_tid: %d ", tdwait->et_td->td_tid);
+ printf("\n");
+ }
+#endif
+ critical_exit();
+ return (0);
+}
+
+int
+in_epoch(epoch_t epoch)
+{
+ return (in_epoch_verbose(epoch, 0));
+}
+
+void
+epoch_adjust_prio(struct thread *td, u_char prio)
+{
+ thread_lock(td);
+ sched_prio(td, prio);
+ thread_unlock(td);
}
Modified: head/sys/net/if.c
==============================================================================
--- head/sys/net/if.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/net/if.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -1760,29 +1760,35 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd)
void
if_addr_rlock(struct ifnet *ifp)
{
-
- IF_ADDR_RLOCK(ifp);
+ MPASS(*(uint64_t *)&ifp->if_addr_et == 0);
+ epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et);
}
void
if_addr_runlock(struct ifnet *ifp)
{
-
- IF_ADDR_RUNLOCK(ifp);
+ epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et);
+#ifdef INVARIANTS
+ bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker));
+#endif
}
void
if_maddr_rlock(if_t ifp)
{
- IF_ADDR_RLOCK((struct ifnet *)ifp);
+ MPASS(*(uint64_t *)&ifp->if_maddr_et == 0);
+ epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et);
}
void
if_maddr_runlock(if_t ifp)
{
- IF_ADDR_RUNLOCK((struct ifnet *)ifp);
+ epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et);
+#ifdef INVARIANTS
+ bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker));
+#endif
}
/*
@@ -1926,7 +1932,7 @@ ifa_ifwithaddr(const struct sockaddr *addr)
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
@@ -1969,7 +1975,7 @@ ifa_ifwithbroadaddr(const struct sockaddr *addr, int f
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum))
continue;
@@ -1999,7 +2005,7 @@ ifa_ifwithdstaddr(const struct sockaddr *addr, int fib
struct ifnet *ifp;
struct ifaddr *ifa;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
@@ -2032,7 +2038,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_
u_int af = addr->sa_family;
const char *addr_data = addr->sa_data, *cplim;
- MPASS(in_epoch());
+ MPASS(in_epoch(net_epoch_preempt));
/*
* AF_LINK addresses can be looked up directly by their index number,
* so do that if we can.
@@ -2069,7 +2075,6 @@ next: continue;
*/
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr)) {
- IF_ADDR_RUNLOCK(ifp);
goto done;
}
} else {
@@ -2128,7 +2133,8 @@ ifaof_ifpforaddr(const struct sockaddr *addr, struct i
if (af >= AF_MAX)
return (NULL);
- MPASS(in_epoch());
+
+ MPASS(in_epoch(net_epoch_preempt));
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != af)
continue;
Modified: head/sys/net/if_gif.h
==============================================================================
--- head/sys/net/if_gif.h Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/net/if_gif.h Wed Jul 4 02:47:16 2018 (r335924)
@@ -96,8 +96,8 @@ struct etherip_header {
/* mbuf adjust factor to force 32-bit alignment of IP header */
#define ETHERIP_ALIGN 2
-#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt)
-#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
+#define GIF_RLOCK() struct epoch_tracker gif_et; epoch_enter_preempt(net_epoch_preempt, &gif_et)
+#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gif_et)
#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt)
/* Prototypes */
Modified: head/sys/net/if_gre.h
==============================================================================
--- head/sys/net/if_gre.h Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/net/if_gre.h Wed Jul 4 02:47:16 2018 (r335924)
@@ -91,8 +91,8 @@ MALLOC_DECLARE(M_GRE);
#endif
#define GRE2IFP(sc) ((sc)->gre_ifp)
-#define GRE_RLOCK() epoch_enter_preempt(net_epoch_preempt)
-#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
+#define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et)
+#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et)
#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define gre_hdr gre_uhdr.hdr
Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c Wed Jul 4 00:18:40 2018 (r335923)
+++ head/sys/net/if_lagg.c Wed Jul 4 02:47:16 2018 (r335924)
@@ -73,10 +73,10 @@ __FBSDID("$FreeBSD$");
#include <net/if_lagg.h>
#include <net/ieee8023ad_lacp.h>
-#define LAGG_RLOCK() epoch_enter_preempt(net_epoch_preempt)
-#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
-#define LAGG_RLOCK_ASSERT() MPASS(in_epoch())
-#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch())
+#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
+#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et)
+#define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
+#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt))
#define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx")
#define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx)
@@ -1791,6 +1791,7 @@ struct lagg_port *
lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp)
{
struct lagg_port *lp_next, *rval = NULL;
+ struct epoch_tracker net_et;
/*
* Search a port which reports an active link state.
@@ -1809,15 +1810,14 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_po
}
search:
- LAGG_RLOCK();
+ epoch_enter_preempt(net_epoch_preempt, &net_et);
CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) {
if (LAGG_PORTACTIVE(lp_next)) {
- LAGG_RUNLOCK();
- rval = lp_next;
- goto found;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list