From nobody Thu Oct 21 22:31:37 2021 X-Original-To: dev-commits-src-branches@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 1625D18132FE; Thu, 21 Oct 2021 22:31:39 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4Hb2Jp3WLKz4fJR; Thu, 21 Oct 2021 22:31:38 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 5AA2D2352F; Thu, 21 Oct 2021 22:31:37 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 19LMVbfd018861; Thu, 21 Oct 2021 22:31:37 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 19LMVboT018860; Thu, 21 Oct 2021 22:31:37 GMT (envelope-from git) Date: Thu, 21 Oct 2021 22:31:37 GMT Message-Id: <202110212231.19LMVboT018860@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Alexander Motin Subject: git: 11f14b33629e - stable/13 - sched_ule(4): Fix hang with steal_thresh < 2. List-Id: Commits to the stable branches of the FreeBSD src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-branches List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-branches@freebsd.org X-BeenThere: dev-commits-src-branches@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: mav X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 11f14b33629e552a451fdbfe653ebb0addd27700 Auto-Submitted: auto-generated X-ThisMailContainsUnwantedMimeParts: N The branch stable/13 has been updated by mav: URL: https://cgit.FreeBSD.org/src/commit/?id=11f14b33629e552a451fdbfe653ebb0addd27700 commit 11f14b33629e552a451fdbfe653ebb0addd27700 Author: Alexander Motin AuthorDate: 2021-09-26 16:03:05 +0000 Commit: Alexander Motin CommitDate: 2021-10-21 22:24:36 +0000 sched_ule(4): Fix hang with steal_thresh < 2. e745d729be60 caused infinite loop with interrupts disabled in load stealing code if steal_thresh set below 2. Such configuration should not generally be used, but appeared some people are using it to workaround some problems. To fix the problem explicitly pass to sched_highest() minimum number of transferrable threads, supported by the caller, instead of guessing. MFC after: 25 days (cherry picked from commit 08063e9f98a33980a09e3bd465926719b3437122) --- sys/kern/sched_ule.c | 70 +++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 98c1f0bca981..6f072c518934 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -638,12 +638,13 @@ struct cpu_search { int cs_prefer; /* Prefer this CPU and groups including it. */ int cs_running; /* The thread is now running at cs_prefer. */ int cs_pri; /* Min priority for low. */ - int cs_limit; /* Max load for low, min load for high. */ + int cs_load; /* Max load for low, min load for high. */ + int cs_trans; /* Min transferable load for high. */ }; struct cpu_search_res { - int cs_cpu; /* The best CPU found. */ - int cs_load; /* The load of cs_cpu. */ + int csr_cpu; /* The best CPU found. */ + int csr_load; /* The load of cs_cpu. */ }; /* @@ -663,7 +664,7 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, total = 0; bload = INT_MAX; - r->cs_cpu = -1; + r->csr_cpu = -1; /* Loop through children CPU groups if there are any. */ if (cg->cg_children > 0) { @@ -681,11 +682,11 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, load >= 128 && (load & 128) != 0) load += 128; - if (lr.cs_cpu >= 0 && (load < bload || - (load == bload && lr.cs_load < r->cs_load))) { + if (lr.csr_cpu >= 0 && (load < bload || + (load == bload && lr.csr_load < r->csr_load))) { bload = load; - r->cs_cpu = lr.cs_cpu; - r->cs_load = lr.cs_load; + r->csr_cpu = lr.csr_cpu; + r->csr_load = lr.csr_load; } } return (total); @@ -711,7 +712,7 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, * If the threads is already on the CPU, don't look on the TDQ * priority, since it can be the priority of the thread itself. */ - if (l > s->cs_limit || (tdq->tdq_lowpri <= s->cs_pri && + if (l > s->cs_load || (tdq->tdq_lowpri <= s->cs_pri && (!s->cs_running || c != s->cs_prefer)) || !CPU_ISSET(c, s->cs_mask)) continue; @@ -727,8 +728,8 @@ cpu_search_lowest(const struct cpu_group *cg, const struct cpu_search *s, load -= sched_random() % 128; if (bload > load - p) { bload = load - p; - r->cs_cpu = c; - r->cs_load = load; + r->csr_cpu = c; + r->csr_load = load; } } return (total); @@ -744,18 +745,18 @@ cpu_search_highest(const struct cpu_group *cg, const struct cpu_search *s, total = 0; bload = INT_MIN; - r->cs_cpu = -1; + r->csr_cpu = -1; /* Loop through children CPU groups if there are any. */ if (cg->cg_children > 0) { for (c = cg->cg_children - 1; c >= 0; c--) { load = cpu_search_highest(&cg->cg_child[c], s, &lr); total += load; - if (lr.cs_cpu >= 0 && (load > bload || - (load == bload && lr.cs_load > r->cs_load))) { + if (lr.csr_cpu >= 0 && (load > bload || + (load == bload && lr.csr_load > r->csr_load))) { bload = load; - r->cs_cpu = lr.cs_cpu; - r->cs_load = lr.cs_load; + r->csr_cpu = lr.csr_cpu; + r->csr_load = lr.csr_load; } } return (total); @@ -772,21 +773,18 @@ cpu_search_highest(const struct cpu_group *cg, const struct cpu_search *s, /* * Check this CPU is acceptable. - * If requested minimum load is 1, then caller must know how - * to handle running threads, not counted in tdq_transferable. */ - if (l < s->cs_limit || (tdq->tdq_transferable == 0 && - (s->cs_limit > 1 || l > 1)) || + if (l < s->cs_load || (tdq->tdq_transferable < s->cs_trans) || !CPU_ISSET(c, s->cs_mask)) continue; load -= sched_random() % 256; if (load > bload) { bload = load; - r->cs_cpu = c; + r->csr_cpu = c; } } - r->cs_load = bload; + r->csr_load = bload; return (total); } @@ -806,24 +804,26 @@ sched_lowest(const struct cpu_group *cg, cpuset_t *mask, int pri, int maxload, s.cs_running = running; s.cs_mask = mask; s.cs_pri = pri; - s.cs_limit = maxload; + s.cs_load = maxload; cpu_search_lowest(cg, &s, &r); - return (r.cs_cpu); + return (r.csr_cpu); } /* * Find the cpu with the highest load via the highest loaded path. */ static inline int -sched_highest(const struct cpu_group *cg, cpuset_t *mask, int minload) +sched_highest(const struct cpu_group *cg, cpuset_t *mask, int minload, + int mintrans) { struct cpu_search s; struct cpu_search_res r; s.cs_mask = mask; - s.cs_limit = minload; + s.cs_load = minload; + s.cs_trans = mintrans; cpu_search_highest(cg, &s, &r); - return (r.cs_cpu); + return (r.csr_cpu); } static void @@ -836,7 +836,7 @@ sched_balance_group(struct cpu_group *cg) CPU_FILL(&hmask); for (;;) { - high = sched_highest(cg, &hmask, 1); + high = sched_highest(cg, &hmask, 1, 0); /* Stop if there is no more CPU with transferrable threads. */ if (high == -1) break; @@ -1008,7 +1008,7 @@ tdq_idled(struct tdq *tdq) restart: switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; for (cg = tdq->tdq_cg, goup = 0; ; ) { - cpu = sched_highest(cg, &mask, steal_thresh); + cpu = sched_highest(cg, &mask, steal_thresh, 1); /* * We were assigned a thread but not preempted. Returning * 0 here will cause our caller to switch to it. @@ -1968,7 +1968,8 @@ tdq_trysteal(struct tdq *tdq) cpuset_t mask; int cpu, i, goup; - if (smp_started == 0 || trysteal_limit == 0 || tdq->tdq_cg == NULL) + if (smp_started == 0 || steal_idle == 0 || trysteal_limit == 0 || + tdq->tdq_cg == NULL) return; CPU_FILL(&mask); CPU_CLR(PCPU_GET(cpuid), &mask); @@ -1976,7 +1977,7 @@ tdq_trysteal(struct tdq *tdq) spinlock_enter(); TDQ_UNLOCK(tdq); for (i = 1, cg = tdq->tdq_cg, goup = 0; ; ) { - cpu = sched_highest(cg, &mask, steal_thresh); + cpu = sched_highest(cg, &mask, steal_thresh, 1); /* * If a thread was added while interrupts were disabled don't * steal one here. @@ -2019,7 +2020,9 @@ tdq_trysteal(struct tdq *tdq) steal = TDQ_CPU(cpu); /* * The data returned by sched_highest() is stale and - * the chosen CPU no longer has an eligible thread. + * the chosen CPU no longer has an eligible thread. + * At this point unconditonally exit the loop to bound + * the time spent in the critcal section. */ if (steal->tdq_load < steal_thresh || steal->tdq_transferable == 0) @@ -2028,8 +2031,7 @@ tdq_trysteal(struct tdq *tdq) * Try to lock both queues. If we are assigned a thread while * waited for the lock, switch to it now instead of stealing. * If we can't get the lock, then somebody likely got there - * first. At this point unconditonally exit the loop to - * bound the time spent in the critcal section. + * first. */ TDQ_LOCK(tdq); if (tdq->tdq_load > 0)