git: a753730ca0aa - stable/13 - Stop arming kqueue timers on knote owner suspend or terminate

Konstantin Belousov kib at FreeBSD.org
Fri Apr 23 11:15:25 UTC 2021


The branch stable/13 has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=a753730ca0aae7b540ff8fbb24197b4e159b40fa

commit a753730ca0aae7b540ff8fbb24197b4e159b40fa
Author:     Konstantin Belousov <kib at FreeBSD.org>
AuthorDate: 2021-03-05 23:29:08 +0000
Commit:     Konstantin Belousov <kib at FreeBSD.org>
CommitDate: 2021-04-23 11:14:09 +0000

    Stop arming kqueue timers on knote owner suspend or terminate
    
    (cherry picked from commit 2fd1ffefaa4d2cd99a19f866a949cb2cd58ef998)
---
 sys/kern/init_main.c   |  1 +
 sys/kern/kern_event.c  | 60 ++++++++++++++++++++++++++++++++++++++++++++------
 sys/kern/kern_fork.c   |  1 +
 sys/kern/kern_sig.c    |  1 +
 sys/kern/sys_process.c |  1 +
 sys/sys/proc.h         |  4 ++++
 6 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 5eb8186c23ca..e46f8f74c83d 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -524,6 +524,7 @@ proc0_init(void *dummy __unused)
 	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
 	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
 	callout_init(&td->td_slpcallout, 1);
+	TAILQ_INIT(&p->p_kqtim_stop);
 
 	/* Create credentials. */
 	newcred = crget();
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 5e9f1fc35dfe..31b091e20984 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -676,8 +676,10 @@ timer2sbintime(int64_t data, int flags)
 
 struct kq_timer_cb_data {
 	struct callout c;
+	struct proc *p;
 	struct knote *kn;
 	int cpuid;
+	TAILQ_ENTRY(kq_timer_cb_data) link;
 	sbintime_t next;	/* next timer event fires at */
 	sbintime_t to;		/* precalculated timer period, 0 for abs */
 };
@@ -689,22 +691,65 @@ kqtimer_sched_callout(struct kq_timer_cb_data *kc)
 	    kc->cpuid, C_ABSOLUTE);
 }
 
+void
+kqtimer_proc_continue(struct proc *p)
+{
+	struct kq_timer_cb_data *kc, *kc1;
+	struct bintime bt;
+	sbintime_t now;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	getboottimebin(&bt);
+	now = bttosbt(bt);
+
+	TAILQ_FOREACH_SAFE(kc, &p->p_kqtim_stop, link, kc1) {
+		TAILQ_REMOVE(&p->p_kqtim_stop, kc, link);
+		if (kc->next <= now)
+			filt_timerexpire(kc->kn);
+		else
+			kqtimer_sched_callout(kc);
+	}
+}
+
 static void
 filt_timerexpire(void *knx)
 {
 	struct knote *kn;
 	struct kq_timer_cb_data *kc;
+	struct proc *p;
+	sbintime_t now;
 
 	kn = knx;
-	kn->kn_data++;
-	KNOTE_ACTIVATE(kn, 0);	/* XXX - handle locking */
-
-	if ((kn->kn_flags & EV_ONESHOT) != 0)
-		return;
 	kc = kn->kn_ptr.p_v;
-	if (kc->to == 0)
+
+	if ((kn->kn_flags & EV_ONESHOT) != 0 || kc->to == 0) {
+		kn->kn_data++;
+		KNOTE_ACTIVATE(kn, 0);
 		return;
-	kc->next += kc->to;
+	}
+
+	for (now = sbinuptime(); kc->next <= now; kc->next += kc->to)
+		kn->kn_data++;
+	KNOTE_ACTIVATE(kn, 0);	/* XXX - handle locking */
+
+	/*
+	 * Initial check for stopped kc->p is racy.  It is fine to
+	 * miss the set of the stop flags, at worst we would schedule
+	 * one more callout.  On the other hand, it is not fine to not
+	 * schedule when we we missed clearing of the flags, we
+	 * recheck them under the lock and observe consistent state.
+	 */
+	p = kc->p;
+	if (P_SHOULDSTOP(p) || P_KILLED(p)) {
+		PROC_LOCK(p);
+		if (P_SHOULDSTOP(p) || P_KILLED(p)) {
+			TAILQ_INSERT_TAIL(&p->p_kqtim_stop, kc, link);
+			PROC_UNLOCK(p);
+			return;
+		}
+		PROC_UNLOCK(p);
+	}
 	kqtimer_sched_callout(kc);
 }
 
@@ -762,6 +807,7 @@ filt_timerattach(struct knote *kn)
 	kn->kn_status &= ~KN_DETACHED;		/* knlist_add clears it */
 	kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
 	kc->kn = kn;
+	kc->p = curproc;
 	kc->cpuid = PCPU_GET(cpuid);
 	callout_init(&kc->c, 1);
 	filt_timerstart(kn, to);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 3da8205d8ab0..2a092b192878 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -606,6 +606,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	LIST_INIT(&p2->p_orphans);
 
 	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
+	TAILQ_INIT(&p2->p_kqtim_stop);
 
 	/*
 	 * This begins the section where we must prevent the parent
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 212b4997dd5e..46b520030dcd 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2424,6 +2424,7 @@ runfast:
 	PROC_SUNLOCK(p);
 out_cont:
 	itimer_proc_continue(p);
+	kqtimer_proc_continue(p);
 out:
 	/* If we jump here, proc slock should not be owned. */
 	PROC_SLOCK_ASSERT(p, MA_NOTOWNED);
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index f00aa57c95d2..c4dfc2def72a 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -1092,6 +1092,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		thread_unsuspend(p);
 		PROC_SUNLOCK(p);
 		itimer_proc_continue(p);
+		kqtimer_proc_continue(p);
 		break;
 
 	case PT_WRITE_I:
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index c0cf0be7e7d7..3e5a96185e0c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -183,6 +183,7 @@ struct kaudit_record;
 struct kcov_info;
 struct kdtrace_proc;
 struct kdtrace_thread;
+struct kq_timer_cb_data;
 struct mqueue_notifier;
 struct p_sched;
 struct proc;
@@ -715,6 +716,8 @@ struct proc {
 	 */
 	LIST_ENTRY(proc) p_orphan;	/* (e) List of orphan processes. */
 	LIST_HEAD(, proc) p_orphans;	/* (e) Pointer to list of orphans. */
+
+	TAILQ_HEAD(, kq_timer_cb_data)	p_kqtim_stop;	/* (c) */
 };
 
 #define	p_session	p_pgrp->pg_session
@@ -1081,6 +1084,7 @@ void	fork_exit(void (*)(void *, struct trapframe *), void *,
 void	fork_return(struct thread *, struct trapframe *);
 int	inferior(struct proc *p);
 void	itimer_proc_continue(struct proc *p);
+void	kqtimer_proc_continue(struct proc *p);
 void	kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry,
 	    int *resident_count, bool *super);
 void	kern_yield(int);


More information about the dev-commits-src-all mailing list