svn commit: r360584 - in projects/nfs-over-tls/sys: dev/cxgbe/tom kern modules opencrypto sys
Rick Macklem
rmacklem at FreeBSD.org
Sun May 3 00:46:15 UTC 2020
Author: rmacklem
Date: Sun May 3 00:46:14 2020
New Revision: 360584
URL: https://svnweb.freebsd.org/changeset/base/360584
Log:
Add jhb@'s ktls rx patch to the kernel.
I also took out a few modules from sys/modules/Makefile, since those
do not build without a full src tree and not just sys.
Modified:
projects/nfs-over-tls/sys/dev/cxgbe/tom/t4_tom.c
projects/nfs-over-tls/sys/kern/uipc_ktls.c
projects/nfs-over-tls/sys/kern/uipc_sockbuf.c
projects/nfs-over-tls/sys/kern/uipc_socket.c
projects/nfs-over-tls/sys/modules/Makefile
projects/nfs-over-tls/sys/opencrypto/ktls_ocf.c
projects/nfs-over-tls/sys/sys/ktls.h
projects/nfs-over-tls/sys/sys/sockbuf.h
projects/nfs-over-tls/sys/sys/socket.h
projects/nfs-over-tls/sys/sys/socketvar.h
Modified: projects/nfs-over-tls/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- projects/nfs-over-tls/sys/dev/cxgbe/tom/t4_tom.c Sun May 3 00:37:16 2020 (r360583)
+++ projects/nfs-over-tls/sys/dev/cxgbe/tom/t4_tom.c Sun May 3 00:46:14 2020 (r360584)
@@ -1080,7 +1080,9 @@ is_tls_sock(struct socket *so, struct adapter *sc)
struct inpcb *inp = sotoinpcb(so);
int i, rc;
- /* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */
+ if (so_options_get(so) & SO_WANT_KTLS)
+ return (1);
+
rc = 0;
ADAPTER_LOCK(sc);
for (i = 0; i < sc->tt.num_tls_rx_ports; i++) {
Modified: projects/nfs-over-tls/sys/kern/uipc_ktls.c
==============================================================================
--- projects/nfs-over-tls/sys/kern/uipc_ktls.c Sun May 3 00:37:16 2020 (r360583)
+++ projects/nfs-over-tls/sys/kern/uipc_ktls.c Sun May 3 00:46:14 2020 (r360584)
@@ -79,7 +79,8 @@ __FBSDID("$FreeBSD$");
struct ktls_wq {
struct mtx mtx;
- STAILQ_HEAD(, mbuf_ext_pgs) head;
+ STAILQ_HEAD(, mbuf_ext_pgs) pgs_head;
+ STAILQ_HEAD(, socket) so_head;
bool running;
} __aligned(CACHE_LINE_SIZE);
@@ -131,10 +132,16 @@ static counter_u64_t ktls_tasks_active;
SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD,
&ktls_tasks_active, "Number of active tasks");
-static counter_u64_t ktls_cnt_on;
-SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, so_inqueue, CTLFLAG_RD,
- &ktls_cnt_on, "Number of TLS records in queue to tasks for SW crypto");
+static counter_u64_t ktls_cnt_tx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_tx_queued,
+ "Number of TLS records in queue to tasks for SW encryption");
+static counter_u64_t ktls_cnt_rx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_rx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_rx_queued,
+ "Number of TLS sockets in queue to tasks for SW decryption");
+
static counter_u64_t ktls_offload_total;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total,
CTLFLAG_RD, &ktls_offload_total,
@@ -149,6 +156,10 @@ static counter_u64_t ktls_offload_active;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD,
&ktls_offload_active, "Total Active TLS sessions");
+static counter_u64_t ktls_offload_corrupted_records;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, corrupted_records, CTLFLAG_RD,
+ &ktls_offload_corrupted_records, "Total corrupted TLS records received");
+
static counter_u64_t ktls_offload_failed_crypto;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD,
&ktls_offload_failed_crypto, "Total TLS crypto failures");
@@ -334,10 +345,12 @@ ktls_init(void *dummy __unused)
int error, i;
ktls_tasks_active = counter_u64_alloc(M_WAITOK);
- ktls_cnt_on = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_tx_queued = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_rx_queued = counter_u64_alloc(M_WAITOK);
ktls_offload_total = counter_u64_alloc(M_WAITOK);
ktls_offload_enable_calls = counter_u64_alloc(M_WAITOK);
ktls_offload_active = counter_u64_alloc(M_WAITOK);
+ ktls_offload_corrupted_records = counter_u64_alloc(M_WAITOK);
ktls_offload_failed_crypto = counter_u64_alloc(M_WAITOK);
ktls_switch_to_ifnet = counter_u64_alloc(M_WAITOK);
ktls_switch_to_sw = counter_u64_alloc(M_WAITOK);
@@ -370,7 +383,8 @@ ktls_init(void *dummy __unused)
* work queue for each CPU.
*/
CPU_FOREACH(i) {
- STAILQ_INIT(&ktls_wq[i].head);
+ STAILQ_INIT(&ktls_wq[i].pgs_head);
+ STAILQ_INIT(&ktls_wq[i].so_head);
mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
@@ -859,7 +873,7 @@ ktls_try_ifnet(struct socket *so, struct ktls_session
}
static int
-ktls_try_sw(struct socket *so, struct ktls_session *tls)
+ktls_try_sw(struct socket *so, struct ktls_session *tls, int direction)
{
struct rm_priotracker prio;
struct ktls_crypto_backend *be;
@@ -874,7 +888,7 @@ ktls_try_sw(struct socket *so, struct ktls_session *tl
if (ktls_allow_unload)
rm_rlock(&ktls_backends_lock, &prio);
LIST_FOREACH(be, &ktls_backends, next) {
- if (be->try(so, tls) == 0)
+ if (be->try(so, tls, direction) == 0)
break;
KASSERT(tls->cipher == NULL,
("ktls backend leaked a cipher pointer"));
@@ -900,6 +914,61 @@ ktls_try_sw(struct socket *so, struct ktls_session *tl
return (0);
}
+/*
+ * KTLS RX stores data in the socket buffer as a list of TLS records,
+ * where each record is stored as a control message containg the TLS
+ * header followed by data mbufs containing the decrypted data. This
+ * is different from KTLS TX which always uses an mb_ext_pgs mbuf for
+ * both encrypted and decrypted data. TLS records decrypted by a NIC
+ * should be queued to the socket buffer as records, but encrypted
+ * data which needs to be decrypted by software arrives as a stream of
+ * regular mbufs which need to be converted. In addition, there may
+ * already be pending encrypted data in the socket buffer when KTLS RX
+ * is enabled.
+ *
+ * To manage not-yet-decrypted data for KTLS RX, the following scheme
+ * is used:
+ *
+ * - A single chain of NOTREADY mbufs is hung off of sb_mtls.
+ *
+ * - ktls_check_rx checks this chain of mbufs reading the TLS header
+ * from the first mbuf. Once all of the data for that TLS record is
+ * queued, the socket is queued to a worker thread.
+ *
+ * - The worker thread calls ktls_decrypt to decrypt TLS records in
+ * the TLS chain. Each TLS record is detached from the TLS chain,
+ * decrypted, and inserted into the regular socket buffer chain as
+ * record starting with a control message holding the TLS header and
+ * a chain of mbufs holding the encrypted data.
+ */
+
+static void
+sb_mark_notready(struct sockbuf *sb)
+{
+ struct mbuf *m;
+
+ m = sb->sb_mb;
+ sb->sb_mtls = m;
+ sb->sb_mb = NULL;
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ for (; m != NULL; m = m->m_next) {
+ KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
+ __func__));
+ KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
+ __func__));
+ KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
+ __func__));
+ m->m_flags |= M_NOTREADY;
+ sb->sb_acc -= m->m_len;
+ sb->sb_tlscc += m->m_len;
+ sb->sb_mtlstail = m;
+ }
+ KASSERT(sb->sb_acc == 0 && sb->sb_tlscc == sb->sb_ccc,
+ ("%s: acc %u tlscc %u ccc %u", __func__, sb->sb_acc, sb->sb_tlscc,
+ sb->sb_ccc));
+}
+
int
ktls_enable_rx(struct socket *so, struct tls_enable *en)
{
@@ -928,16 +997,20 @@ ktls_enable_rx(struct socket *so, struct tls_enable *e
if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
return (ENOTSUP);
+ /* TLS 1.3 is not yet supported. */
+ if (en->tls_vmajor == TLS_MAJOR_VER_ONE &&
+ en->tls_vminor == TLS_MINOR_VER_THREE)
+ return (ENOTSUP);
+
error = ktls_create_session(so, en, &tls);
if (error)
return (error);
- /* TLS RX offload is only supported on TOE currently. */
#ifdef TCP_OFFLOAD
error = ktls_try_toe(so, tls, KTLS_RX);
-#else
- error = EOPNOTSUPP;
+ if (error)
#endif
+ error = ktls_try_sw(so, tls, KTLS_RX);
if (error) {
ktls_cleanup(tls);
@@ -946,7 +1019,13 @@ ktls_enable_rx(struct socket *so, struct tls_enable *e
/* Mark the socket as using TLS offload. */
SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq);
so->so_rcv.sb_tls_info = tls;
+ so->so_rcv.sb_flags |= SB_TLS_RX;
+
+ /* Mark existing data as not ready until it can be decrypted. */
+ sb_mark_notready(&so->so_rcv);
+ ktls_check_rx(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
counter_u64_add(ktls_offload_total, 1);
@@ -997,7 +1076,7 @@ ktls_enable_tx(struct socket *so, struct tls_enable *e
#endif
error = ktls_try_ifnet(so, tls, false);
if (error)
- error = ktls_try_sw(so, tls);
+ error = ktls_try_sw(so, tls, KTLS_TX);
if (error) {
ktls_cleanup(tls);
@@ -1102,7 +1181,7 @@ ktls_set_tx_mode(struct socket *so, int mode)
if (mode == TCP_TLS_MODE_IFNET)
error = ktls_try_ifnet(so, tls_new, true);
else
- error = ktls_try_sw(so, tls_new);
+ error = ktls_try_sw(so, tls_new, KTLS_TX);
if (error) {
counter_u64_add(ktls_switch_failed, 1);
ktls_free(tls_new);
@@ -1430,6 +1509,371 @@ ktls_frame(struct mbuf *top, struct ktls_session *tls,
}
void
+ktls_check_rx(struct sockbuf *sb)
+{
+ struct tls_record_layer hdr;
+ struct ktls_wq *wq;
+ struct socket *so;
+ bool running;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
+ __func__, sb));
+ so = __containerof(sb, struct socket, so_rcv);
+
+ if (sb->sb_flags & SB_TLS_RX_RUNNING)
+ return;
+
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < sizeof(hdr)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ m_copydata(sb->sb_mtls, 0, sizeof(hdr), (void *)&hdr);
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < sizeof(hdr) + ntohs(hdr.tls_length)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ sb->sb_flags |= SB_TLS_RX_RUNNING;
+
+ soref(so);
+ wq = &ktls_wq[so->so_rcv.sb_tls_info->wq_index];
+ mtx_lock(&wq->mtx);
+ STAILQ_INSERT_TAIL(&wq->so_head, so, so_ktls_rx_list);
+ running = wq->running;
+ mtx_unlock(&wq->mtx);
+ if (!running)
+ wakeup(wq);
+ counter_u64_add(ktls_cnt_rx_queued, 1);
+}
+
+static struct mbuf *
+ktls_detach_record(struct sockbuf *sb, int len)
+{
+ struct mbuf *m, *n, *top;
+ int remain;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ MPASS(len <= sb->sb_tlscc);
+
+ /*
+ * If TLS chain is the exact size of the record,
+ * just grab the whole record.
+ */
+ top = sb->sb_mtls;
+ if (sb->sb_tlscc == len) {
+ sb->sb_mtls = NULL;
+ sb->sb_mtlstail = NULL;
+ goto out;
+ }
+
+ /*
+ * While it would be nice to use m_split() here, we need
+ * to know exactly what m_split() allocates to update the
+ * accounting, so do it inline instead.
+ */
+ remain = len;
+ for (m = top; remain > m->m_len; m = m->m_next)
+ remain -= m->m_len;
+
+ /* Easy case: don't have to split 'm'. */
+ if (remain == m->m_len) {
+ sb->sb_mtls = m->m_next;
+ if (sb->sb_mtls == NULL)
+ sb->sb_mtlstail = NULL;
+ m->m_next = NULL;
+ goto out;
+ }
+
+ /*
+ * Need to allocate an mbuf to hold the remainder of 'm'. Try
+ * with M_NOWAIT first.
+ */
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ /*
+ * Use M_WAITOK with socket buffer unlocked. If
+ * 'sb_mtls' changes while the lock is dropped, return
+ * NULL to force the caller to retry.
+ */
+ SOCKBUF_UNLOCK(sb);
+
+ n = m_get(M_WAITOK, MT_DATA);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_mtls != top) {
+ m_free(n);
+ return (NULL);
+ }
+ }
+ n->m_flags |= M_NOTREADY;
+
+ /* Store remainder in 'n'. */
+ n->m_len = m->m_len - remain;
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + remain;
+ mb_dupcl(n, m);
+ } else {
+ bcopy(mtod(m, caddr_t) + remain, mtod(n, caddr_t), n->m_len);
+ }
+
+ /* Trim 'm' and update accounting. */
+ m->m_len -= n->m_len;
+ sb->sb_tlscc -= n->m_len;
+ sb->sb_ccc -= n->m_len;
+
+ /* Account for 'n'. */
+ sballoc_ktls_rx(sb, n);
+
+ /* Insert 'n' into the TLS chain. */
+ sb->sb_mtls = n;
+ n->m_next = m->m_next;
+ if (sb->sb_mtlstail == m)
+ sb->sb_mtlstail = n;
+
+ /* Detach the record from the TLS chain. */
+ m->m_next = NULL;
+
+out:
+ MPASS(m_length(top, NULL) == len);
+ for (m = top; m != NULL; m = m->m_next)
+ sbfree_ktls_rx(sb, m);
+ sb->sb_tlsdcc = len;
+ sb->sb_ccc += len;
+ SBCHECK(sb);
+ return (top);
+}
+
+static int
+m_segments(struct mbuf *m, int skip)
+{
+ int count;
+
+ while (skip >= m->m_len) {
+ skip -= m->m_len;
+ m = m->m_next;
+ }
+
+ for (count = 0; m != NULL; count++)
+ m = m->m_next;
+ return (count);
+}
+
+static void
+ktls_decrypt(struct socket *so)
+{
+ char tls_header[MBUF_PEXT_HDR_LEN];
+ struct ktls_session *tls;
+ struct sockbuf *sb;
+ struct tls_record_layer *hdr;
+ struct iovec *iov;
+ struct tls_get_record tgr;
+ struct mbuf *control, *data, *m;
+ uint64_t seqno;
+ int error, i, iov_cap, iov_count, remain, tls_len, trail_len;
+
+ hdr = (struct tls_record_layer *)tls_header;
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX_RUNNING,
+ ("%s: socket %p not running", __func__, so));
+
+ tls = sb->sb_tls_info;
+ MPASS(tls != NULL);
+
+ iov = NULL;
+ iov_cap = 0;
+ for (;;) {
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < tls->params.tls_hlen)
+ break;
+
+ m_copydata(sb->sb_mtls, 0, tls->params.tls_hlen, tls_header);
+ tls_len = sizeof(*hdr) + ntohs(hdr->tls_length);
+
+ if (hdr->tls_vmajor != tls->params.tls_vmajor ||
+ hdr->tls_vminor != tls->params.tls_vminor)
+ error = EINVAL;
+ else if (tls_len < tls->params.tls_hlen || tls_len >
+ tls->params.tls_hlen + TLS_MAX_MSG_SIZE_V10_2 +
+ tls->params.tls_tlen)
+ error = EMSGSIZE;
+ else
+ error = 0;
+ if (__predict_false(error != 0)) {
+ /*
+ * We have a corrupted record and are likely
+ * out of sync. The connection isn't
+ * recoverable at this point, so abort it.
+ */
+ SOCKBUF_UNLOCK(sb);
+ counter_u64_add(ktls_offload_corrupted_records, 1);
+
+ CURVNET_SET(so->so_vnet);
+ so->so_proto->pr_usrreqs->pru_abort(so);
+ so->so_error = error;
+ CURVNET_RESTORE();
+ goto deref;
+ }
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < tls_len)
+ break;
+
+ /*
+ * Split out the portion of the mbuf chain containing
+ * this TLS record.
+ */
+ data = ktls_detach_record(sb, tls_len);
+ if (data == NULL)
+ continue;
+ MPASS(sb->sb_tlsdcc == tls_len);
+
+ seqno = sb->sb_tls_seqno;
+ sb->sb_tls_seqno++;
+ SBCHECK(sb);
+ SOCKBUF_UNLOCK(sb);
+
+ /*
+ * Build an I/O vector spanning the TLS record payload
+ * and trailer but skipping the header.
+ */
+ iov_count = m_segments(data, tls->params.tls_hlen);
+ if (iov_count > iov_cap) {
+ free(iov, M_KTLS);
+ iov = malloc(sizeof(*iov) * iov_count, M_KTLS,
+ M_WAITOK);
+ iov_cap = iov_count;
+ }
+ remain = tls->params.tls_hlen;
+ for (m = data; remain >= m->m_len; m = m->m_next)
+ remain -= m->m_len;
+ iov[0].iov_base = m->m_data + remain;
+ iov[0].iov_len = m->m_len - remain;
+ for (m = m->m_next, i = 1; m != NULL; m = m->m_next, i++) {
+ iov[i].iov_base = m->m_data;
+ iov[i].iov_len = m->m_len;
+ }
+ MPASS(i == iov_count);
+
+ error = tls->sw_decrypt(tls, hdr, iov, iov_count, seqno,
+ &trail_len);
+ if (error) {
+ counter_u64_add(ktls_offload_failed_crypto, 1);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /*
+ * sbcut/drop/flush discarded these
+ * mbufs.
+ */
+ m_freem(data);
+ break;
+ }
+
+ /*
+ * Drop this TLS record's data, but keep
+ * decrypting subsequent records.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+
+ CURVNET_SET(so->so_vnet);
+ so->so_error = EBADMSG;
+ sorwakeup_locked(so);
+ CURVNET_RESTORE();
+
+ m_freem(data);
+
+ SOCKBUF_LOCK(sb);
+ continue;
+ }
+
+ /* Allocate the control mbuf. */
+ tgr.tls_type = hdr->tls_type;
+ tgr.tls_vmajor = hdr->tls_vmajor;
+ tgr.tls_vminor = hdr->tls_vminor;
+ tgr.tls_length = htobe16(tls_len - tls->params.tls_hlen -
+ trail_len);
+ control = sbcreatecontrol_how(&tgr, sizeof(tgr),
+ TLS_GET_RECORD, IPPROTO_TCP, M_WAITOK);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /* sbcut/drop/flush discarded these mbufs. */
+ MPASS(sb->sb_tlscc == 0);
+ m_freem(data);
+ m_freem(control);
+ break;
+ }
+
+ /*
+ * Clear the 'dcc' accounting in preparation for
+ * adding the decrypted record.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+ SBCHECK(sb);
+
+ /* If there is no payload, drop all of the data. */
+ if (tgr.tls_length == htobe16(0)) {
+ m_freem(data);
+ data = NULL;
+ } else {
+ /* Trim header. */
+ remain = tls->params.tls_hlen;
+ while (remain > 0) {
+ if (data->m_len > remain) {
+ data->m_data += remain;
+ data->m_len -= remain;
+ break;
+ }
+ remain -= data->m_len;
+ data = m_free(data);
+ }
+
+ /* Trim trailer and clear M_NOTREADY. */
+ remain = be16toh(tgr.tls_length);
+ m = data;
+ for (m = data; remain > m->m_len; m = m->m_next) {
+ m->m_flags &= ~M_NOTREADY;
+ remain -= m->m_len;
+ }
+ m->m_len = remain;
+ m_freem(m->m_next);
+ m->m_next = NULL;
+ m->m_flags &= ~M_NOTREADY;
+
+ /* Set EOR on the final mbuf. */
+ m->m_flags |= M_EOR;
+ }
+
+ sbappendcontrol_locked(sb, data, control, 0);
+ }
+
+ sb->sb_flags &= ~SB_TLS_RX_RUNNING;
+
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc > 0)
+ so->so_error = EMSGSIZE;
+
+ sorwakeup_locked(so);
+
+deref:
+ SOCKBUF_UNLOCK_ASSERT(sb);
+
+ CURVNET_SET(so->so_vnet);
+ SOCK_LOCK(so);
+ sorele(so);
+ CURVNET_RESTORE();
+}
+
+void
ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs)
{
struct ktls_wq *wq;
@@ -1439,7 +1883,7 @@ ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs)
pgs->mbuf = NULL;
wq = &ktls_wq[pgs->tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, pgs, stailq);
+ STAILQ_INSERT_TAIL(&wq->pgs_head, pgs, stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
@@ -1473,12 +1917,12 @@ ktls_enqueue(struct mbuf *m, struct socket *so, int pa
wq = &ktls_wq[pgs->tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, pgs, stailq);
+ STAILQ_INSERT_TAIL(&wq->pgs_head, pgs, stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
wakeup(wq);
- counter_u64_add(ktls_cnt_on, 1);
+ counter_u64_add(ktls_cnt_tx_queued, 1);
}
static __noinline void
@@ -1633,36 +2077,46 @@ static void
ktls_work_thread(void *ctx)
{
struct ktls_wq *wq = ctx;
- struct mbuf_ext_pgs *p, *n;
+ struct mbuf_ext_pgs *p, *pn;
+ struct socket *so, *son;
struct ktls_session *tls;
struct mbuf *m;
- STAILQ_HEAD(, mbuf_ext_pgs) local_head;
+ STAILQ_HEAD(, mbuf_ext_pgs) local_pgs_head;
+ STAILQ_HEAD(, socket) local_so_head;
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
fpu_kern_thread(0);
#endif
for (;;) {
mtx_lock(&wq->mtx);
- while (STAILQ_EMPTY(&wq->head)) {
+ while (STAILQ_EMPTY(&wq->pgs_head) &&
+ STAILQ_EMPTY(&wq->so_head)) {
wq->running = false;
mtx_sleep(wq, &wq->mtx, 0, "-", 0);
wq->running = true;
}
- STAILQ_INIT(&local_head);
- STAILQ_CONCAT(&local_head, &wq->head);
+ STAILQ_INIT(&local_pgs_head);
+ STAILQ_CONCAT(&local_pgs_head, &wq->pgs_head);
+ STAILQ_INIT(&local_so_head);
+ STAILQ_CONCAT(&local_so_head, &wq->so_head);
mtx_unlock(&wq->mtx);
- STAILQ_FOREACH_SAFE(p, &local_head, stailq, n) {
+ STAILQ_FOREACH_SAFE(p, &local_pgs_head, stailq, pn) {
if (p->mbuf != NULL) {
ktls_encrypt(p);
- counter_u64_add(ktls_cnt_on, -1);
+ counter_u64_add(ktls_cnt_tx_queued, -1);
} else {
tls = p->tls;
ktls_free(tls);
m = __containerof(p, struct mbuf, m_ext_pgs);
uma_zfree(zone_mbuf, m);
}
+ }
+
+ STAILQ_FOREACH_SAFE(so, &local_so_head, so_ktls_rx_list, son) {
+ ktls_decrypt(so);
+ counter_u64_add(ktls_cnt_rx_queued, -1);
}
}
}
Modified: projects/nfs-over-tls/sys/kern/uipc_sockbuf.c
==============================================================================
--- projects/nfs-over-tls/sys/kern/uipc_sockbuf.c Sun May 3 00:37:16 2020 (r360583)
+++ projects/nfs-over-tls/sys/kern/uipc_sockbuf.c Sun May 3 00:46:14 2020 (r360584)
@@ -70,6 +70,8 @@ u_long sb_max_adj =
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
+static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
+ struct mbuf *n);
static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
static void sbflush_internal(struct sockbuf *sb);
@@ -339,7 +341,52 @@ sbfree(struct sockbuf *sb, struct mbuf *m)
sb->sb_sndptroff -= m->m_len;
}
+#ifdef KERN_TLS
/*
+ * Similar to sballoc/sbfree but does not adjust state associated with
+ * the sb_mb chain such a sb_fnrdy or sb_sndptr*. Also assumes mbufs
+ * are not ready.
+ */
+void
+sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+
+ sb->sb_mbcnt += MSIZE;
+ sb->sb_mcnt += 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt += m->m_ext.ext_size;
+ sb->sb_ccnt += 1;
+ }
+}
+
+void
+sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+#if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+
+ sb->sb_ccc -= m->m_len;
+ sb->sb_tlscc -= m->m_len;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt -= m->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+}
+#endif
+
+/*
* Socantsendmore indicates that no more data will be sent on the socket; it
* would normally be applied to a socket when the user informs the system
* that no more data is to be sent, by the protocol code (in case
@@ -375,6 +422,10 @@ socantrcvmore_locked(struct socket *so)
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+#ifdef KERN_TLS
+ if (so->so_rcv.sb_flags & SB_TLS_RX)
+ ktls_check_rx(&so->so_rcv);
+#endif
sorwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
@@ -775,6 +826,24 @@ sblastmbufchk(struct sockbuf *sb, const char *file, in
}
panic("%s from %s:%u", __func__, file, line);
}
+
+#ifdef KERN_TLS
+ m = sb->sb_mtls;
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mtlstail) {
+ printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
+ __func__, sb->sb_mtls, sb->sb_mtlstail, m);
+ printf("TLS packet tree:\n");
+ printf("\t");
+ for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
+ printf("%p ", m);
+ }
+ printf("\n");
+ panic("%s from %s:%u", __func__, file, line);
+ }
+#endif
}
#endif /* SOCKBUF_DEBUG */
@@ -852,7 +921,30 @@ sbappend(struct sockbuf *sb, struct mbuf *m, int flags
SOCKBUF_UNLOCK(sb);
}
+#ifdef KERN_TLS
/*
+ * Append an mbuf containing encrypted TLS data. The data
+ * is marked M_NOTREADY until it has been decrypted and
+ * stored as a TLS record.
+ */
+static void
+sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+ struct mbuf *n;
+
+ SBLASTMBUFCHK(sb);
+
+ /* Remove all packet headers and mbuf tags to get a pure data chain. */
+ m_demote(m, 1, 0);
+
+ for (n = m; n != NULL; n = n->m_next)
+ n->m_flags |= M_NOTREADY;
+ sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
+ ktls_check_rx(sb);
+}
+#endif
+
+/*
* This version of sbappend() should only be used when the caller absolutely
* knows that there will never be more than one record in the socket buffer,
* that is, a stream protocol (such as TCP).
@@ -863,6 +955,19 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
+
+#ifdef KERN_TLS
+ /*
+ * Decrypted TLS records are appended as records via
+ * sbappendrecord(). TCP passes encrypted TLS records to this
+ * function which must be scheduled for decryption.
+ */
+ if (sb->sb_flags & SB_TLS_RX) {
+ sbappend_ktls_rx(sb, m);
+ return;
+ }
+#endif
+
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
SBLASTMBUFCHK(sb);
@@ -901,6 +1006,9 @@ sbcheck(struct sockbuf *sb, const char *file, int line
{
struct mbuf *m, *n, *fnrdy;
u_long acc, ccc, mbcnt;
+#ifdef KERN_TLS
+ u_long tlscc;
+#endif
SOCKBUF_LOCK_ASSERT(sb);
@@ -936,9 +1044,46 @@ sbcheck(struct sockbuf *sb, const char *file, int line
mbcnt += m->m_ext.ext_size;
}
}
+#ifdef KERN_TLS
+ /*
+ * Account for mbufs "detached" by ktls_detach_record() while
+ * they are decrypted by ktls_decrypt(). tlsdcc gives a count
+ * of the detached bytes that are included in ccc. The mbufs
+ * and clusters are not included in the socket buffer
+ * accounting.
+ */
+ ccc += sb->sb_tlsdcc;
+
+ tlscc = 0;
+ for (m = sb->sb_mtls; m; m = m->m_next) {
+ if (m->m_nextpkt != NULL) {
+ printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
+ goto fail;
+ }
+ if ((m->m_flags & M_NOTREADY) == 0) {
+ printf("sb %p TLS mbuf %p ready\n", sb, m);
+ goto fail;
+ }
+ tlscc += m->m_len;
+ ccc += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
+ mbcnt += m->m_ext.ext_size;
+ }
+
+ if (sb->sb_tlscc != tlscc) {
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+ goto fail;
+ }
+#endif
if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
+#ifdef KERN_TLS
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+#endif
goto fail;
}
return;
@@ -1214,14 +1359,72 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct
SBLASTMBUFCHK(sb);
}
+#ifdef KERN_TLS
/*
+ * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs
+ * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
+ * a bit simpler (no EOR markers, always MT_DATA, etc.).
+ */
+static void
+sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ while (m) {
+ KASSERT((m->m_flags & M_EOR) == 0,
+ ("TLS RX mbuf %p with EOR", m));
+ KASSERT(m->m_type == MT_DATA,
+ ("TLS RX mbuf %p is not MT_DATA", m));
+ KASSERT((m->m_flags & M_NOTREADY) != 0,
+ ("TLS RX mbuf %p ready", m));
+ KASSERT((m->m_flags & M_NOMAP) == 0,
+ ("TLS RX mbuf %p unmapped", m));
+
+ if (m->m_len == 0) {
+ m = m_free(m);
+ continue;
+ }
+
+ /*
+ * Even though both 'n' and 'm' are NOTREADY, it's ok
+ * to coalesce the data.
+ */
+ if (n &&
+ M_WRITABLE(n) &&
+ ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
+ !(n->m_flags & (M_NOMAP)) &&
+ m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+ m->m_len <= M_TRAILINGSPACE(n)) {
+ m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
+ n->m_len += m->m_len;
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mtls = m;
+ sb->sb_mtlstail = m;
+ sballoc_ktls_rx(sb, m);
+ n = m;
+ m = m->m_next;
+ n->m_next = NULL;
+ }
+ SBLASTMBUFCHK(sb);
+}
+#endif
+
+/*
* Free all mbufs in a sockbuf. Check that all resources are reclaimed.
*/
static void
sbflush_internal(struct sockbuf *sb)
{
- while (sb->sb_mbcnt) {
+ while (sb->sb_mbcnt || sb->sb_tlsdcc) {
/*
* Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
* we would loop forever. Panic instead.
@@ -1259,6 +1462,7 @@ static struct mbuf *
sbcut_internal(struct sockbuf *sb, int len)
{
struct mbuf *m, *next, *mfree;
+ bool is_tls;
KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
__func__, len));
@@ -1266,10 +1470,25 @@ sbcut_internal(struct sockbuf *sb, int len)
__func__, len, sb->sb_ccc));
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ is_tls = false;
mfree = NULL;
while (len > 0) {
if (m == NULL) {
+#ifdef KERN_TLS
+ if (next == NULL && !is_tls) {
+ if (sb->sb_tlsdcc != 0) {
+ MPASS(len >= sb->sb_tlsdcc);
+ len -= sb->sb_tlsdcc;
+ sb->sb_ccc -= sb->sb_tlsdcc;
+ sb->sb_tlsdcc = 0;
+ if (len == 0)
+ break;
+ }
+ next = sb->sb_mtls;
+ is_tls = true;
+ }
+#endif
KASSERT(next, ("%s: no next, len %d", __func__, len));
m = next;
next = m->m_nextpkt;
@@ -1288,12 +1507,17 @@ sbcut_internal(struct sockbuf *sb, int len)
break;
}
len -= m->m_len;
- sbfree(sb, m);
+#ifdef KERN_TLS
+ if (is_tls)
+ sbfree_ktls_rx(sb, m);
+ else
+#endif
+ sbfree(sb, m);
/*
* Do not put M_NOTREADY buffers to the free list, they
* are referenced from outside.
*/
- if (m->m_flags & M_NOTREADY)
+ if (m->m_flags & M_NOTREADY && !is_tls)
m = m->m_next;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list