git: 105b9df26ee0 - main - LinuxKPI: 802.11: close race lkpi_sta_scan_to_auth()/(*iv_update_bss)()
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 07 Jun 2024 21:41:07 UTC
The branch main has been updated by bz: URL: https://cgit.FreeBSD.org/src/commit/?id=105b9df26ee0286f3a5a7d191075e068dee1c4a2 commit 105b9df26ee0286f3a5a7d191075e068dee1c4a2 Author: Bjoern A. Zeeb <bz@FreeBSD.org> AuthorDate: 2024-02-18 20:57:51 +0000 Commit: Bjoern A. Zeeb <bz@FreeBSD.org> CommitDate: 2024-06-07 21:40:32 +0000 LinuxKPI: 802.11: close race lkpi_sta_scan_to_auth()/(*iv_update_bss)() We have to unlock the net80211 ic lock in order to be able to call sleepable downcalls to the driver/firmware; a 2nd thread may go through net80211::join1() and (*iv_update_bss)() after we checked and unlocked. Re-check status at the end of the function under the ic lock so that we do not accidentally set lvif_bss_synched to true again despite it no longer being true. This should fix a race where we lost the (*iv_update_bss)() state during startup where one SCAN->AUTH is followed by a (then) AUTH->AUTH and lkpi_sta_a_to_a() did the wrong thing. Once we re-consider net80211 state and allowing a second join on a different node or iv_bss update without previously tearing down the older node we can likely undo a lot of these extra checks and workarounds. Sponsored by: The FreeBSD Foundation (updated version) Tested by: emaste (on and off) MFC after: 3 days Reviewd by: cc Differential Revision: https://reviews.freebsd.org/D43967 --- sys/compat/linuxkpi/common/src/linux_80211.c | 66 +++++++++++++++++++--------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/sys/compat/linuxkpi/common/src/linux_80211.c b/sys/compat/linuxkpi/common/src/linux_80211.c index cd1897a08ccd..bf2050deed0d 100644 --- a/sys/compat/linuxkpi/common/src/linux_80211.c +++ b/sys/compat/linuxkpi/common/src/linux_80211.c @@ -1275,25 +1275,6 @@ lkpi_sta_scan_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int lsta = ni->ni_drv_data; LKPI_80211_LVIF_LOCK(lvif); - /* Re-check given (*iv_update_bss) could have happened. */ - /* XXX-BZ KASSERT later? or deal as error? */ - if (lvif->lvif_bss_synched || lvif->lvif_bss != NULL) - ic_printf(vap->iv_ic, "%s:%d: lvif %p vap %p iv_bss %p lvif_bss %p " - "lvif_bss->ni %p synched %d, ni %p lsta %p\n", __func__, __LINE__, - lvif, vap, vap->iv_bss, lvif->lvif_bss, - (lvif->lvif_bss != NULL) ? lvif->lvif_bss->ni : NULL, - lvif->lvif_bss_synched, ni, lsta); - - /* - * Reference the ni for this cache of lsta/ni on lvif->lvif_bss - * essentially out lsta version of the iv_bss. - * Do NOT use iv_bss here anymore as that may have diverged from our - * function local ni already and would lead to inconsistencies. - */ - ieee80211_ref_node(ni); - lvif->lvif_bss = lsta; - lvif->lvif_bss_synched = true; - /* Insert the [l]sta into the list of known stations. */ TAILQ_INSERT_TAIL(&lvif->lsta_head, lsta, lsta_entry); LKPI_80211_LVIF_UNLOCK(lvif); @@ -1342,11 +1323,56 @@ lkpi_sta_scan_to_auth(struct ieee80211vap *vap, enum ieee80211_state nstate, int * (ideally we'd do that on a callback for something else ...) */ + LKPI_80211_LHW_UNLOCK(lhw); + IEEE80211_LOCK(vap->iv_ic); + + LKPI_80211_LVIF_LOCK(lvif); + /* Re-check given (*iv_update_bss) could have happened while we were unlocked. */ + if (lvif->lvif_bss_synched || lvif->lvif_bss != NULL || + lsta->ni != vap->iv_bss) + ic_printf(vap->iv_ic, "%s:%d: lvif %p vap %p iv_bss %p lvif_bss %p " + "lvif_bss->ni %p synched %d, ni %p lsta %p\n", __func__, __LINE__, + lvif, vap, vap->iv_bss, lvif->lvif_bss, + (lvif->lvif_bss != NULL) ? lvif->lvif_bss->ni : NULL, + lvif->lvif_bss_synched, ni, lsta); + + /* + * Reference the "ni" for caching the lsta/ni in lvif->lvif_bss. + * Given we cache lsta we use lsta->ni instead of ni here (even though + * lsta->ni == ni) to be distinct from the rest of the code where we do + * assume that ni == vap->iv_bss which it may or may not be. + * So do NOT use iv_bss here anymore as that may have diverged from our + * function local ni already while ic was unlocked and would lead to + * inconsistencies. Go and see if we lost a race and do not update + * lvif_bss_synched in that case. + */ + ieee80211_ref_node(lsta->ni); + lvif->lvif_bss = lsta; + if (lsta->ni == vap->iv_bss) { + lvif->lvif_bss_synched = true; + } else { + /* Set to un-synched no matter what. */ + lvif->lvif_bss_synched = false; + /* + * We do not error as someone has to take us down. + * If we are followed by a 2nd, new net80211::join1() going to + * AUTH lkpi_sta_a_to_a() will error, lkpi_sta_auth_to_{scan,init}() + * will take the lvif->lvif_bss node down eventually. + * What happens with the vap->iv_bss node will entirely be up + * to net80211 as we never used the node beyond alloc()/free() + * and we do not hold an extra reference for that anymore given + * ni : lsta == 1:1. + */ + } + LKPI_80211_LVIF_UNLOCK(lvif); + goto out_relocked; + out: LKPI_80211_LHW_UNLOCK(lhw); IEEE80211_LOCK(vap->iv_ic); +out_relocked: /* - * Release the reference that keop the ni stable locally + * Release the reference that kept the ni stable locally * during the work of this function. */ if (ni != NULL)