git: 1fd43ee968c4 - main - tpm: fix multi-threaded access with per-open state

From: Chuck Silvers <chs_at_FreeBSD.org>
Date: Fri, 20 Mar 2026 18:49:22 UTC
The branch main has been updated by chs:

URL: https://cgit.FreeBSD.org/src/commit/?id=1fd43ee968c497223254038483685d9f9c68791b

commit 1fd43ee968c497223254038483685d9f9c68791b
Author:     Chuck Silvers <chs@FreeBSD.org>
AuthorDate: 2026-03-20 18:48:44 +0000
Commit:     Chuck Silvers <chs@FreeBSD.org>
CommitDate: 2026-03-20 18:48:44 +0000

    tpm: fix multi-threaded access with per-open state
    
    The TPM driver currently has a single buffer per instance to hold the
    result of a command, and does not allow subsequent commands to be sent
    until the current result is read by the same OS thread that sent the
    command, with a timeout to throw away the result after a while if the
    result is not read in a timely fashion.  This has a couple problems:
    
     - The timeout code has a bug which causes all subsequent commands to
       hang forever if a different OS thread tries to read the result
       before the OS thread which sent the command, and the OS thread
       which sent the command never tries to read the result.
    
     - Even if the first problem is fixed, applications expect to be able
       to read the result from a different OS thread than the OS thread
       which sent the command. The particular case that we saw was a go
       application where the go runtime scheduled the goroutine which read
       the result to a different OS thread from one where the goroutine
       that sent the command ran, and there's no way to force these to
       always run on the same OS thread.
    
    Fix all of this by replacing the global result buffer with a per-open
    result buffer via devfs_set_cdevpriv(), so that we no longer need to
    block subsequent commands until the results of a previous command are
    retrieved or care about which OS thread is reading the result of a
    command.
    
    Sponsored by:   Netflix
    Reviewed by:    olivier, imp
    Differential Revision:  https://reviews.freebsd.org/D52328
---
 sys/dev/tpm/tpm20.c        | 116 ++++++++++++++++++---------------------------
 sys/dev/tpm/tpm20.h        |  13 ++---
 sys/dev/tpm/tpm_crb.c      |  19 ++++----
 sys/dev/tpm/tpm_if.m       |   5 ++
 sys/dev/tpm/tpm_tis_core.c |  28 +++++------
 5 files changed, 81 insertions(+), 100 deletions(-)

diff --git a/sys/dev/tpm/tpm20.c b/sys/dev/tpm/tpm20.c
index c521ca9dda9d..48f33708917d 100644
--- a/sys/dev/tpm/tpm20.c
+++ b/sys/dev/tpm/tpm20.c
@@ -41,7 +41,6 @@
 
 MALLOC_DEFINE(M_TPM20, "tpm_buffer", "buffer for tpm 2.0 driver");
 
-static void tpm20_discard_buffer(void *arg);
 #if defined TPM_HARVEST || defined RANDOM_ENABLE_TPM
 static void tpm20_harvest(void *arg, int unused);
 #endif
@@ -68,27 +67,23 @@ int
 tpm20_read(struct cdev *dev, struct uio *uio, int flags)
 {
 	struct tpm_sc *sc;
+	struct tpm_priv *priv;
 	size_t bytes_to_transfer;
 	size_t offset;
 	int result = 0;
 
 	sc = (struct tpm_sc *)dev->si_drv1;
+	devfs_get_cdevpriv((void **)&priv);
 
-	callout_stop(&sc->discard_buffer_callout);
 	sx_xlock(&sc->dev_lock);
-	if (sc->owner_tid != uio->uio_td->td_tid) {
-		sx_xunlock(&sc->dev_lock);
-		return (EPERM);
-	}
-
-	bytes_to_transfer = MIN(sc->pending_data_length, uio->uio_resid);
-	offset = sc->total_length - sc->pending_data_length;
+	offset = priv->offset;
+	bytes_to_transfer = MIN(priv->len, uio->uio_resid);
 	if (bytes_to_transfer > 0) {
-		result = uiomove((caddr_t) sc->buf + offset, bytes_to_transfer, uio);
-		sc->pending_data_length -= bytes_to_transfer;
-		cv_signal(&sc->buf_cv);
+		result = uiomove((caddr_t) priv->buf + offset, bytes_to_transfer, uio);
+		priv->offset += bytes_to_transfer;
+		priv->len -= bytes_to_transfer;
 	} else {
-		result = ETIMEDOUT;
+		result = 0;
 	}
 
 	sx_xunlock(&sc->dev_lock);
@@ -100,10 +95,12 @@ int
 tpm20_write(struct cdev *dev, struct uio *uio, int flags)
 {
 	struct tpm_sc *sc;
+	struct tpm_priv *priv;
 	size_t byte_count;
 	int result = 0;
 
 	sc = (struct tpm_sc *)dev->si_drv1;
+	devfs_get_cdevpriv((void **)&priv);
 
 	byte_count = uio->uio_resid;
 	if (byte_count < TPM_HEADER_SIZE) {
@@ -120,52 +117,42 @@ tpm20_write(struct cdev *dev, struct uio *uio, int flags)
 
 	sx_xlock(&sc->dev_lock);
 
-	while (sc->pending_data_length != 0)
-		cv_wait(&sc->buf_cv, &sc->dev_lock);
-
-	result = uiomove(sc->buf, byte_count, uio);
+	result = uiomove(priv->buf, byte_count, uio);
 	if (result != 0) {
 		sx_xunlock(&sc->dev_lock);
 		return (result);
 	}
 
-	result = TPM_TRANSMIT(sc->dev, byte_count);
-
-	if (result == 0) {
-		callout_reset(&sc->discard_buffer_callout,
-		    TPM_READ_TIMEOUT / tick, tpm20_discard_buffer, sc);
-		sc->owner_tid = uio->uio_td->td_tid;
-	}
+	result = TPM_TRANSMIT(sc->dev, priv, byte_count);
 
 	sx_xunlock(&sc->dev_lock);
 	return (result);
 }
 
-static void
-tpm20_discard_buffer(void *arg)
+static struct tpm_priv *
+tpm20_priv_alloc(void)
 {
-	struct tpm_sc *sc;
-
-	sc = (struct tpm_sc *)arg;
-	if (callout_pending(&sc->discard_buffer_callout))
-		return;
+	struct tpm_priv *priv;
 
-	sx_xlock(&sc->dev_lock);
-
-	memset(sc->buf, 0, TPM_BUFSIZE);
-	sc->pending_data_length = 0;
-	sc->total_length = 0;
+	priv = malloc(sizeof (*priv), M_TPM20, M_WAITOK | M_ZERO);
+	return (priv);
+}
 
-	cv_signal(&sc->buf_cv);
-	sx_xunlock(&sc->dev_lock);
+static void
+tpm20_priv_dtor(void *data)
+{
+	struct tpm_priv *priv = data;
 
-	device_printf(sc->dev,
-	    "User failed to read buffer in time\n");
+	free(priv->buf, M_TPM20);
 }
 
 int
 tpm20_open(struct cdev *dev, int flag, int mode, struct thread *td)
 {
+	struct tpm_priv *priv;
+
+	priv = tpm20_priv_alloc();
+	devfs_set_cdevpriv(priv, tpm20_priv_dtor);
 
 	return (0);
 }
@@ -198,10 +185,7 @@ tpm20_init(struct tpm_sc *sc)
 	struct make_dev_args args;
 	int result;
 
-	cv_init(&sc->buf_cv, "TPM buffer cv");
-	callout_init(&sc->discard_buffer_callout, 1);
-	sc->pending_data_length = 0;
-	sc->total_length = 0;
+	sc->internal_priv = tpm20_priv_alloc();
 
 	make_dev_args_init(&args);
 	args.mda_devsw = &tpm20_cdevsw;
@@ -234,11 +218,8 @@ tpm20_release(struct tpm_sc *sc)
 	random_source_deregister(&random_tpm);
 #endif
 
-	if (sc->buf != NULL)
-		free(sc->buf, M_TPM20);
-
+	tpm20_priv_dtor(sc->internal_priv);
 	sx_destroy(&sc->dev_lock);
-	cv_destroy(&sc->buf_cv);
 	if (sc->sc_cdev != NULL)
 		destroy_dev(sc->sc_cdev);
 }
@@ -286,6 +267,7 @@ static void
 tpm20_harvest(void *arg, int unused)
 {
 	struct tpm_sc *sc;
+	struct tpm_priv *priv;
 	unsigned char entropy[TPM_HARVEST_SIZE];
 	uint16_t entropy_size;
 	int result;
@@ -298,26 +280,22 @@ tpm20_harvest(void *arg, int unused)
 
 	sc = arg;
 	sx_xlock(&sc->dev_lock);
-	while (sc->pending_data_length != 0)
-		cv_wait(&sc->buf_cv, &sc->dev_lock);
 
-	memcpy(sc->buf, cmd, sizeof(cmd));
-	result = TPM_TRANSMIT(sc->dev, sizeof(cmd));
+	priv = sc->internal_priv;
+	memcpy(priv->buf, cmd, sizeof(cmd));
+
+	result = TPM_TRANSMIT(sc->dev, priv, sizeof(cmd));
 	if (result != 0) {
 		sx_xunlock(&sc->dev_lock);
 		return;
 	}
 
-	/* Ignore response size */
-	sc->pending_data_length = 0;
-	sc->total_length = 0;
-
 	/* The number of random bytes we got is placed right after the header */
-	entropy_size = (uint16_t) sc->buf[TPM_HEADER_SIZE + 1];
+	entropy_size = (uint16_t) priv->buf[TPM_HEADER_SIZE + 1];
 	if (entropy_size > 0) {
 		entropy_size = MIN(entropy_size, TPM_HARVEST_SIZE);
 		memcpy(entropy,
-			sc->buf + TPM_HEADER_SIZE + sizeof(uint16_t),
+			priv->buf + TPM_HEADER_SIZE + sizeof(uint16_t),
 			entropy_size);
 	}
 
@@ -334,6 +312,7 @@ static int
 tpm20_restart(device_t dev, bool clear)
 {
 	struct tpm_sc *sc;
+	struct tpm_priv *priv;
 	uint8_t startup_cmd[] = {
 		0x80, 0x01,             /* TPM_ST_NO_SESSIONS tag*/
 		0x00, 0x00, 0x00, 0x0C, /* cmd length */
@@ -349,18 +328,16 @@ tpm20_restart(device_t dev, bool clear)
 	if (clear)
 		startup_cmd[11] = 0; /* TPM_SU_CLEAR */
 
-	if (sc == NULL || sc->buf == NULL)
+	if (sc == NULL)
 		return (0);
 
 	sx_xlock(&sc->dev_lock);
 
-	MPASS(sc->pending_data_length == 0);
-	memcpy(sc->buf, startup_cmd, sizeof(startup_cmd));
+	priv = sc->internal_priv;
+	memcpy(priv->buf, startup_cmd, sizeof(startup_cmd));
 
 	/* XXX Ignoring both TPM_TRANSMIT return and tpm's response */
-	TPM_TRANSMIT(sc->dev, sizeof(startup_cmd));
-	sc->pending_data_length = 0;
-	sc->total_length = 0;
+	TPM_TRANSMIT(sc->dev, priv, sizeof(startup_cmd));
 
 	sx_xunlock(&sc->dev_lock);
 
@@ -371,6 +348,7 @@ static int
 tpm20_save_state(device_t dev, bool suspend)
 {
 	struct tpm_sc *sc;
+	struct tpm_priv *priv;
 	uint8_t save_cmd[] = {
 		0x80, 0x01,             /* TPM_ST_NO_SESSIONS tag*/
 		0x00, 0x00, 0x00, 0x0C, /* cmd length */
@@ -386,18 +364,16 @@ tpm20_save_state(device_t dev, bool suspend)
 	if (suspend)
 		save_cmd[11] = 1; /* TPM_SU_STATE */
 
-	if (sc == NULL || sc->buf == NULL)
+	if (sc == NULL)
 		return (0);
 
 	sx_xlock(&sc->dev_lock);
 
-	MPASS(sc->pending_data_length == 0);
-	memcpy(sc->buf, save_cmd, sizeof(save_cmd));
+	priv = sc->internal_priv;
+	memcpy(priv->buf, save_cmd, sizeof(save_cmd));
 
 	/* XXX Ignoring both TPM_TRANSMIT return and tpm's response */
-	TPM_TRANSMIT(sc->dev, sizeof(save_cmd));
-	sc->pending_data_length = 0;
-	sc->total_length = 0;
+	TPM_TRANSMIT(sc->dev, priv, sizeof(save_cmd));
 
 	sx_xunlock(&sc->dev_lock);
 
diff --git a/sys/dev/tpm/tpm20.h b/sys/dev/tpm/tpm20.h
index 96b2920283b6..b63bb9a1436e 100644
--- a/sys/dev/tpm/tpm20.h
+++ b/sys/dev/tpm/tpm20.h
@@ -105,6 +105,12 @@
 
 MALLOC_DECLARE(M_TPM20);
 
+struct tpm_priv {
+	uint8_t 	buf[TPM_BUFSIZE];
+	size_t		offset;
+	size_t		len;
+};
+
 struct tpm_sc {
 	device_t	dev;
 
@@ -116,18 +122,13 @@ struct tpm_sc {
 	struct cdev	*sc_cdev;
 
 	struct sx 	dev_lock;
-	struct cv	buf_cv;
 
 	void 		*intr_cookie;
 	int 		intr_type;	/* Current event type */
 	bool 		interrupts;
 
-	uint8_t 	*buf;
-	size_t		pending_data_length;
-	size_t		total_length;
-	lwpid_t		owner_tid;
+	struct tpm_priv *internal_priv;
 
-	struct callout 	discard_buffer_callout;
 #if defined TPM_HARVEST || defined RANDOM_ENABLE_TPM
 	struct timeout_task 	harvest_task;
 #endif
diff --git a/sys/dev/tpm/tpm_crb.c b/sys/dev/tpm/tpm_crb.c
index 18414a6c799b..ac093c3857ba 100644
--- a/sys/dev/tpm/tpm_crb.c
+++ b/sys/dev/tpm/tpm_crb.c
@@ -127,7 +127,7 @@ struct tpmcrb_sc {
 	size_t		rsp_buf_size;
 };
 
-int tpmcrb_transmit(device_t dev, size_t size);
+int tpmcrb_transmit(device_t dev, struct tpm_priv *priv, size_t size);
 
 static int tpmcrb_acpi_probe(device_t dev);
 static int tpmcrb_attach(device_t dev);
@@ -257,7 +257,6 @@ tpmcrb_attach(device_t dev)
 	sc->dev = dev;
 
 	sx_init(&sc->dev_lock, "TPM driver lock");
-	sc->buf = malloc(TPM_BUFSIZE, M_TPM20, M_WAITOK);
 
 	sc->mem_rid = 0;
 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid,
@@ -480,7 +479,7 @@ tpmcrb_state_ready(struct tpmcrb_sc *crb_sc, bool wait)
 }
 
 int
-tpmcrb_transmit(device_t dev, size_t length)
+tpmcrb_transmit(device_t dev, struct tpm_priv *priv, size_t length)
 {
 	struct tpmcrb_sc *crb_sc;
 	struct tpm_sc *sc;
@@ -531,12 +530,12 @@ tpmcrb_transmit(device_t dev, size_t length)
 	 * Calculate timeout for current command.
 	 * Command code is passed in bytes 6-10.
 	 */
-	curr_cmd = be32toh(*(uint32_t *) (&sc->buf[6]));
+	curr_cmd = be32toh(*(uint32_t *) (&priv->buf[6]));
 	timeout = tpm20_get_timeout(curr_cmd);
 
 	/* Send command and tell device to process it. */
 	bus_write_region_stream_1(sc->mem_res, crb_sc->cmd_off,
-	    sc->buf, length);
+	    priv->buf, length);
 	TPM_WRITE_BARRIER(dev, crb_sc->cmd_off, length);
 
 	TPM_WRITE_4(dev, TPM_CRB_CTRL_START, TPM_CRB_CTRL_START_CMD);
@@ -559,8 +558,8 @@ tpmcrb_transmit(device_t dev, size_t length)
 
 	/* Read response header. Length is passed in bytes 2 - 6. */
 	bus_read_region_stream_1(sc->mem_res, crb_sc->rsp_off,
-	    sc->buf, TPM_HEADER_SIZE);
-	bytes_available = be32toh(*(uint32_t *) (&sc->buf[2]));
+	    priv->buf, TPM_HEADER_SIZE);
+	bytes_available = be32toh(*(uint32_t *) (&priv->buf[2]));
 
 	if (bytes_available > TPM_BUFSIZE || bytes_available < TPM_HEADER_SIZE) {
 		device_printf(dev,
@@ -570,7 +569,7 @@ tpmcrb_transmit(device_t dev, size_t length)
 	}
 
 	bus_read_region_stream_1(sc->mem_res, crb_sc->rsp_off + TPM_HEADER_SIZE,
-	      &sc->buf[TPM_HEADER_SIZE], bytes_available - TPM_HEADER_SIZE);
+	      &priv->buf[TPM_HEADER_SIZE], bytes_available - TPM_HEADER_SIZE);
 
 	/*
 	 * No need to wait for the transition to idle on the way out, we can
@@ -583,8 +582,8 @@ tpmcrb_transmit(device_t dev, size_t length)
 	}
 
 	tpmcrb_relinquish_locality(sc);
-	sc->pending_data_length = bytes_available;
-	sc->total_length = bytes_available;
+	priv->offset = 0;
+	priv->len = bytes_available;
 
 	return (0);
 }
diff --git a/sys/dev/tpm/tpm_if.m b/sys/dev/tpm/tpm_if.m
index b0149ba163a6..3b0dc9e3892f 100644
--- a/sys/dev/tpm/tpm_if.m
+++ b/sys/dev/tpm/tpm_if.m
@@ -28,6 +28,10 @@
 #include <sys/bus.h>
 #include <dev/tpm/tpm20.h>
 
+HEADER {
+	struct tpm_priv;
+};
+
 INTERFACE tpm;
 
 #
@@ -35,6 +39,7 @@ INTERFACE tpm;
 #
 METHOD int transmit {
 	device_t dev;
+	struct tpm_priv *priv;
 	size_t length;
 };
 
diff --git a/sys/dev/tpm/tpm_tis_core.c b/sys/dev/tpm/tpm_tis_core.c
index 34ecfcc283b1..f49a1f982e82 100644
--- a/sys/dev/tpm/tpm_tis_core.c
+++ b/sys/dev/tpm/tpm_tis_core.c
@@ -73,7 +73,7 @@
 #define	TPM_STS_BURST_MASK		0xFFFF00
 #define	TPM_STS_BURST_OFFSET		0x8
 
-static int tpmtis_transmit(device_t dev, size_t length);
+static int tpmtis_transmit(device_t dev, struct tpm_priv *priv, size_t length);
 
 static int tpmtis_detach(device_t dev);
 
@@ -104,7 +104,6 @@ tpmtis_attach(device_t dev)
 	sc->intr_type = -1;
 
 	sx_init(&sc->dev_lock, "TPM driver lock");
-	sc->buf = malloc(TPM_BUFSIZE, M_TPM20, M_WAITOK);
 
 	resource_int_value("tpm", device_get_unit(dev), "use_polling", &poll);
 	if (poll != 0) {
@@ -164,6 +163,7 @@ tpmtis_detach(device_t dev)
 static void
 tpmtis_test_intr(struct tpm_sc *sc)
 {
+	struct tpm_priv *priv;
 	uint8_t cmd[] = {
 		0x80, 0x01,		/* TPM_ST_NO_SESSIONS tag*/
 		0x00, 0x00, 0x00, 0x0c,	/* cmd length */
@@ -172,9 +172,9 @@ tpmtis_test_intr(struct tpm_sc *sc)
 	};
 
 	sx_xlock(&sc->dev_lock);
-	memcpy(sc->buf, cmd, sizeof(cmd));
-	tpmtis_transmit(sc->dev, sizeof(cmd));
-	sc->pending_data_length = 0;
+	priv = sc->internal_priv;
+	memcpy(priv->buf, cmd, sizeof(cmd));
+	tpmtis_transmit(sc->dev, priv, sizeof(cmd));
 	sx_xunlock(&sc->dev_lock);
 }
 
@@ -384,7 +384,7 @@ tpmtis_go_ready(struct tpm_sc *sc)
 }
 
 static int
-tpmtis_transmit(device_t dev, size_t length)
+tpmtis_transmit(device_t dev, struct tpm_priv *priv, size_t length)
 {
 	struct tpm_sc *sc;
 	size_t bytes_available;
@@ -404,7 +404,7 @@ tpmtis_transmit(device_t dev, size_t length)
 		    "Failed to switch to ready state\n");
 		return (EIO);
 	}
-	if (!tpmtis_write_bytes(sc, length, sc->buf)) {
+	if (!tpmtis_write_bytes(sc, length, priv->buf)) {
 		device_printf(dev,
 		    "Failed to write cmd to device\n");
 		return (EIO);
@@ -428,7 +428,7 @@ tpmtis_transmit(device_t dev, size_t length)
 	 * Calculate timeout for current command.
 	 * Command code is passed in bytes 6-10.
 	 */
-	curr_cmd = be32toh(*(uint32_t *) (&sc->buf[6]));
+	curr_cmd = be32toh(*(uint32_t *) (&priv->buf[6]));
 	timeout = tpm20_get_timeout(curr_cmd);
 
 	TPM_WRITE_4(dev, TPM_STS, TPM_STS_CMD_START);
@@ -455,12 +455,12 @@ tpmtis_transmit(device_t dev, size_t length)
 			return (EIO);
 	}
 	/* Read response header. Length is passed in bytes 2 - 6. */
-	if(!tpmtis_read_bytes(sc, TPM_HEADER_SIZE, sc->buf)) {
+	if (!tpmtis_read_bytes(sc, TPM_HEADER_SIZE, priv->buf)) {
 		device_printf(dev,
 		    "Failed to read response header\n");
 		return (EIO);
 	}
-	bytes_available = be32toh(*(uint32_t *) (&sc->buf[2]));
+	bytes_available = be32toh(*(uint32_t *) (&priv->buf[2]));
 
 	if (bytes_available > TPM_BUFSIZE || bytes_available < TPM_HEADER_SIZE) {
 		device_printf(dev,
@@ -468,15 +468,15 @@ tpmtis_transmit(device_t dev, size_t length)
 		    bytes_available);
 		return (EIO);
 	}
-	if(!tpmtis_read_bytes(sc, bytes_available - TPM_HEADER_SIZE,
-	    &sc->buf[TPM_HEADER_SIZE])) {
+	if (!tpmtis_read_bytes(sc, bytes_available - TPM_HEADER_SIZE,
+	    &priv->buf[TPM_HEADER_SIZE])) {
 		device_printf(dev,
 		    "Failed to read response\n");
 		return (EIO);
 	}
 	tpmtis_relinquish_locality(sc);
-	sc->pending_data_length = bytes_available;
-	sc->total_length = bytes_available;
+	priv->offset = 0;
+	priv->len = bytes_available;
 
 	return (0);
 }