git: dfbbcea39b73 - stable/13 - wg: Trim compat shims for versions older than current stable/13.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 11 Nov 2022 22:04:25 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=dfbbcea39b73e141cb72886ce5e5b2b170331f59

commit dfbbcea39b73e141cb72886ce5e5b2b170331f59
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2022-10-28 20:36:13 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-11-11 21:50:09 +0000

    wg: Trim compat shims for versions older than current stable/13.
    
    Reviewed by:    kevans, markj, emaste
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D36913
    
    (cherry picked from commit 854d066251a9e1731993416b1934e2002a29d395)
---
 sys/dev/wg/compat.h    |  101 ---
 sys/dev/wg/crypto.h    |   67 +-
 sys/dev/wg/wg_crypto.c | 1645 ++----------------------------------------------
 3 files changed, 52 insertions(+), 1761 deletions(-)

diff --git a/sys/dev/wg/compat.h b/sys/dev/wg/compat.h
index 101a771579d9..29f6ea92a50e 100644
--- a/sys/dev/wg/compat.h
+++ b/sys/dev/wg/compat.h
@@ -9,110 +9,9 @@
 
 #include <sys/param.h>
 
-#if (__FreeBSD_version < 1400036 && __FreeBSD_version >= 1400000) || __FreeBSD_version < 1300519
-#define COMPAT_NEED_CHACHA20POLY1305_MBUF
-#endif
-
-#if __FreeBSD_version < 1400048
-#define COMPAT_NEED_CHACHA20POLY1305
-#endif
-
-#if __FreeBSD_version < 1400049
-#define COMPAT_NEED_CURVE25519
-#endif
-
-#if __FreeBSD_version < 0x7fffffff /* TODO: update this when implemented */
 #define COMPAT_NEED_BLAKE2S
-#endif
 
 #if __FreeBSD_version < 1400059
 #include <sys/sockbuf.h>
 #define sbcreatecontrol(a, b, c, d, e) sbcreatecontrol(a, b, c, d)
 #endif
-
-#if __FreeBSD_version < 1300507
-#include <sys/smp.h>
-#include <sys/gtaskqueue.h>
-
-struct taskqgroup_cpu {
-	LIST_HEAD(, grouptask)  tgc_tasks;
-	struct gtaskqueue       *tgc_taskq;
-	int     tgc_cnt;
-	int     tgc_cpu;
-};
-
-struct taskqgroup {
-	struct taskqgroup_cpu tqg_queue[MAXCPU];
-	/* Other members trimmed from compat. */
-};
-
-static inline void taskqgroup_drain_all(struct taskqgroup *tqg)
-{
-	struct gtaskqueue *q;
-
-	for (int i = 0; i < mp_ncpus; i++) {
-		q = tqg->tqg_queue[i].tgc_taskq;
-		if (q == NULL)
-			continue;
-		gtaskqueue_drain_all(q);
-	}
-}
-#endif
-
-#if __FreeBSD_version < 1300000
-#define VIMAGE
-
-#include <sys/types.h>
-#include <sys/limits.h>
-#include <sys/endian.h>
-#include <sys/socket.h>
-#include <sys/libkern.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/socketvar.h>
-#include <sys/protosw.h>
-#include <net/vnet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <vm/uma.h>
-
-#define taskqgroup_attach(a, b, c, d, e, f) taskqgroup_attach((a), (b), (c), -1, (f))
-#define taskqgroup_attach_cpu(a, b, c, d, e, f, g) taskqgroup_attach_cpu((a), (b), (c), (d), -1, (g))
-
-#undef NET_EPOCH_ENTER
-#define NET_EPOCH_ENTER(et) NET_EPOCH_ENTER_ET(et)
-#undef NET_EPOCH_EXIT
-#define NET_EPOCH_EXIT(et) NET_EPOCH_EXIT_ET(et)
-#define NET_EPOCH_CALL(f, c) epoch_call(net_epoch_preempt, (c), (f))
-#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
-
-#undef atomic_load_ptr
-#define atomic_load_ptr(p) (*(volatile __typeof(*p) *)(p))
-
-#endif
-
-#if __FreeBSD_version < 1202000
-static inline uint32_t arc4random_uniform(uint32_t bound)
-{
-	uint32_t ret, max_mod_bound;
-
-	if (bound < 2)
-		return 0;
-
-	max_mod_bound = (1 + ~bound) % bound;
-
-	do {
-		ret = arc4random();
-	} while (ret < max_mod_bound);
-
-	return ret % bound;
-}
-
-typedef void callout_func_t(void *);
-
-#ifndef CSUM_SND_TAG
-#define CSUM_SND_TAG 0x80000000
-#endif
-
-#endif
diff --git a/sys/dev/wg/crypto.h b/sys/dev/wg/crypto.h
index 2115039321b1..ff7b39354749 100644
--- a/sys/dev/wg/crypto.h
+++ b/sys/dev/wg/crypto.h
@@ -8,6 +8,9 @@
 #define _WG_CRYPTO
 
 #include <sys/param.h>
+#include <sys/endian.h>
+#include <crypto/chacha20_poly1305.h>
+#include <crypto/curve25519.h>
 
 struct mbuf;
 
@@ -20,36 +23,6 @@ enum chacha20poly1305_lengths {
 	CHACHA20POLY1305_AUTHTAG_SIZE = 16
 };
 
-#ifdef COMPAT_NEED_CHACHA20POLY1305
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len, const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len,  const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-#else
-#include <sys/endian.h>
-#include <crypto/chacha20_poly1305.h>
-
 static inline void
 chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
 			 const uint8_t *ad, const size_t ad_len,
@@ -95,7 +68,6 @@ xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
 {
 	return (xchacha20_poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key));
 }
-#endif
 
 int
 chacha20poly1305_encrypt_mbuf(struct mbuf *, const uint64_t nonce,
@@ -146,37 +118,4 @@ static inline void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
 }
 #endif
 
-#ifdef COMPAT_NEED_CURVE25519
-enum curve25519_lengths {
-        CURVE25519_KEY_SIZE = 32
-};
-
-bool curve25519(uint8_t mypublic[static CURVE25519_KEY_SIZE],
-		const uint8_t secret[static CURVE25519_KEY_SIZE],
-		const uint8_t basepoint[static CURVE25519_KEY_SIZE]);
-
-static inline bool
-curve25519_generate_public(uint8_t pub[static CURVE25519_KEY_SIZE],
-			   const uint8_t secret[static CURVE25519_KEY_SIZE])
-{
-	static const uint8_t basepoint[CURVE25519_KEY_SIZE] = { 9 };
-
-	return curve25519(pub, secret, basepoint);
-}
-
-static inline void curve25519_clamp_secret(uint8_t secret[static CURVE25519_KEY_SIZE])
-{
-        secret[0] &= 248;
-        secret[31] = (secret[31] & 127) | 64;
-}
-
-static inline void curve25519_generate_secret(uint8_t secret[CURVE25519_KEY_SIZE])
-{
-	arc4random_buf(secret, CURVE25519_KEY_SIZE);
-	curve25519_clamp_secret(secret);
-}
-#else
-#include <crypto/curve25519.h>
-#endif
-
 #endif
diff --git a/sys/dev/wg/wg_crypto.c b/sys/dev/wg/wg_crypto.c
index 29d9487d647f..53441ef25b40 100644
--- a/sys/dev/wg/wg_crypto.c
+++ b/sys/dev/wg/wg_crypto.c
@@ -12,776 +12,38 @@
 
 #include "crypto.h"
 
-#ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF
 static crypto_session_t chacha20_poly1305_sid;
-#endif
 
+#ifdef COMPAT_NEED_BLAKE2S
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
-#ifndef noinline
-#define noinline __attribute__((noinline))
-#endif
-#ifndef __aligned
-#define __aligned(x) __attribute__((aligned(x)))
-#endif
 #ifndef DIV_ROUND_UP
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #endif
 
 #define le32_to_cpup(a) le32toh(*(a))
-#define le64_to_cpup(a) le64toh(*(a))
 #define cpu_to_le32(a) htole32(a)
-#define cpu_to_le64(a) htole64(a)
 
-static inline __unused uint32_t get_unaligned_le32(const uint8_t *a)
-{
-	uint32_t l;
-	__builtin_memcpy(&l, a, sizeof(l));
-	return le32_to_cpup(&l);
-}
-static inline __unused uint64_t get_unaligned_le64(const uint8_t *a)
-{
-	uint64_t l;
-	__builtin_memcpy(&l, a, sizeof(l));
-	return le64_to_cpup(&l);
-}
-static inline __unused void put_unaligned_le32(uint32_t s, uint8_t *d)
-{
-	uint32_t l = cpu_to_le32(s);
-	__builtin_memcpy(d, &l, sizeof(l));
-}
-static inline __unused void cpu_to_le32_array(uint32_t *buf, unsigned int words)
+static inline void cpu_to_le32_array(uint32_t *buf, unsigned int words)
 {
         while (words--) {
 		*buf = cpu_to_le32(*buf);
 		++buf;
 	}
 }
-static inline __unused void le32_to_cpu_array(uint32_t *buf, unsigned int words)
+static inline void le32_to_cpu_array(uint32_t *buf, unsigned int words)
 {
         while (words--) {
 		*buf = le32_to_cpup(buf);
 		++buf;
         }
 }
-static inline __unused uint32_t rol32(uint32_t word, unsigned int shift)
-{
-        return (word << (shift & 31)) | (word >> ((-shift) & 31));
-}
-static inline __unused uint32_t ror32(uint32_t word, unsigned int shift)
+static inline uint32_t ror32(uint32_t word, unsigned int shift)
 {
 	return (word >> (shift & 31)) | (word << ((-shift) & 31));
 }
 
-#if defined(COMPAT_NEED_CHACHA20POLY1305) || defined(COMPAT_NEED_CHACHA20POLY1305_MBUF)
-static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		dst[i] = src1[i] ^ src2[i];
-}
-
-#define QUARTER_ROUND(x, a, b, c, d) ( \
-	x[a] += x[b], \
-	x[d] = rol32((x[d] ^ x[a]), 16), \
-	x[c] += x[d], \
-	x[b] = rol32((x[b] ^ x[c]), 12), \
-	x[a] += x[b], \
-	x[d] = rol32((x[d] ^ x[a]), 8), \
-	x[c] += x[d], \
-	x[b] = rol32((x[b] ^ x[c]), 7) \
-)
-
-#define C(i, j) (i * 4 + j)
-
-#define DOUBLE_ROUND(x) ( \
-	/* Column Round */ \
-	QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
-	QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
-	QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
-	QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
-	/* Diagonal Round */ \
-	QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
-	QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
-	QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
-	QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
-)
-
-#define TWENTY_ROUNDS(x) ( \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x) \
-)
-
-enum chacha20_lengths {
-	CHACHA20_NONCE_SIZE = 16,
-	CHACHA20_KEY_SIZE = 32,
-	CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t),
-	CHACHA20_BLOCK_SIZE = 64,
-	CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t),
-	HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
-	HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
-};
-
-enum chacha20_constants { /* expand 32-byte k */
-	CHACHA20_CONSTANT_EXPA = 0x61707865U,
-	CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
-	CHACHA20_CONSTANT_2_BY = 0x79622d32U,
-	CHACHA20_CONSTANT_TE_K = 0x6b206574U
-};
-
-struct chacha20_ctx {
-	union {
-		uint32_t state[16];
-		struct {
-			uint32_t constant[4];
-			uint32_t key[8];
-			uint32_t counter[4];
-		};
-	};
-};
-
-static void chacha20_init(struct chacha20_ctx *ctx,
-			  const uint8_t key[CHACHA20_KEY_SIZE],
-			  const uint64_t nonce)
-{
-	ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
-	ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
-	ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
-	ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
-	ctx->key[0] = get_unaligned_le32(key + 0);
-	ctx->key[1] = get_unaligned_le32(key + 4);
-	ctx->key[2] = get_unaligned_le32(key + 8);
-	ctx->key[3] = get_unaligned_le32(key + 12);
-	ctx->key[4] = get_unaligned_le32(key + 16);
-	ctx->key[5] = get_unaligned_le32(key + 20);
-	ctx->key[6] = get_unaligned_le32(key + 24);
-	ctx->key[7] = get_unaligned_le32(key + 28);
-	ctx->counter[0] = 0;
-	ctx->counter[1] = 0;
-	ctx->counter[2] = nonce & 0xffffffffU;
-	ctx->counter[3] = nonce >> 32;
-}
-
-static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream)
-{
-	uint32_t x[CHACHA20_BLOCK_WORDS];
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(x); ++i)
-		x[i] = ctx->state[i];
-
-	TWENTY_ROUNDS(x);
-
-	for (i = 0; i < ARRAY_SIZE(x); ++i)
-		stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
-
-	ctx->counter[0] += 1;
-}
-
-static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in,
-		     uint32_t len)
-{
-	uint32_t buf[CHACHA20_BLOCK_WORDS];
-
-	while (len >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block(ctx, buf);
-		xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE);
-		len -= CHACHA20_BLOCK_SIZE;
-		out += CHACHA20_BLOCK_SIZE;
-		in += CHACHA20_BLOCK_SIZE;
-	}
-	if (len) {
-		chacha20_block(ctx, buf);
-		xor_cpy(out, in, (uint8_t *)buf, len);
-	}
-}
-
-static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS],
-		      const uint8_t nonce[HCHACHA20_NONCE_SIZE],
-		      const uint8_t key[HCHACHA20_KEY_SIZE])
-{
-	uint32_t x[] = { CHACHA20_CONSTANT_EXPA,
-		    CHACHA20_CONSTANT_ND_3,
-		    CHACHA20_CONSTANT_2_BY,
-		    CHACHA20_CONSTANT_TE_K,
-		    get_unaligned_le32(key +  0),
-		    get_unaligned_le32(key +  4),
-		    get_unaligned_le32(key +  8),
-		    get_unaligned_le32(key + 12),
-		    get_unaligned_le32(key + 16),
-		    get_unaligned_le32(key + 20),
-		    get_unaligned_le32(key + 24),
-		    get_unaligned_le32(key + 28),
-		    get_unaligned_le32(nonce +  0),
-		    get_unaligned_le32(nonce +  4),
-		    get_unaligned_le32(nonce +  8),
-		    get_unaligned_le32(nonce + 12)
-	};
-
-	TWENTY_ROUNDS(x);
-
-	memcpy(derived_key + 0, x +  0, sizeof(uint32_t) * 4);
-	memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4);
-}
-
-enum poly1305_lengths {
-	POLY1305_BLOCK_SIZE = 16,
-	POLY1305_KEY_SIZE = 32,
-	POLY1305_MAC_SIZE = 16
-};
-
-struct poly1305_internal {
-	uint32_t h[5];
-	uint32_t r[5];
-	uint32_t s[4];
-};
-
-struct poly1305_ctx {
-	struct poly1305_internal state;
-	uint32_t nonce[4];
-	uint8_t data[POLY1305_BLOCK_SIZE];
-	size_t num;
-};
-
-static void poly1305_init_core(struct poly1305_internal *st,
-			       const uint8_t key[16])
-{
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
-	st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
-	st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
-	st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
-	st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
-
-	/* s = 5*r */
-	st->s[0] = st->r[1] * 5;
-	st->s[1] = st->r[2] * 5;
-	st->s[2] = st->r[3] * 5;
-	st->s[3] = st->r[4] * 5;
-
-	/* h = 0 */
-	st->h[0] = 0;
-	st->h[1] = 0;
-	st->h[2] = 0;
-	st->h[3] = 0;
-	st->h[4] = 0;
-}
-
-static void poly1305_blocks_core(struct poly1305_internal *st,
-				 const uint8_t *input, size_t len,
-				 const uint32_t padbit)
-{
-	const uint32_t hibit = padbit << 24;
-	uint32_t r0, r1, r2, r3, r4;
-	uint32_t s1, s2, s3, s4;
-	uint32_t h0, h1, h2, h3, h4;
-	uint64_t d0, d1, d2, d3, d4;
-	uint32_t c;
-
-	r0 = st->r[0];
-	r1 = st->r[1];
-	r2 = st->r[2];
-	r3 = st->r[3];
-	r4 = st->r[4];
-
-	s1 = st->s[0];
-	s2 = st->s[1];
-	s3 = st->s[2];
-	s4 = st->s[3];
-
-	h0 = st->h[0];
-	h1 = st->h[1];
-	h2 = st->h[2];
-	h3 = st->h[3];
-	h4 = st->h[4];
-
-	while (len >= POLY1305_BLOCK_SIZE) {
-		/* h += m[i] */
-		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
-		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
-		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
-		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
-		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
-
-		/* h *= r */
-		d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) +
-		     ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) +
-		     ((uint64_t)h4 * s1);
-		d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
-		     ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) +
-		     ((uint64_t)h4 * s2);
-		d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
-		     ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) +
-		     ((uint64_t)h4 * s3);
-		d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
-		     ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
-		     ((uint64_t)h4 * s4);
-		d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
-		     ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
-		     ((uint64_t)h4 * r0);
-
-		/* (partial) h %= p */
-		c = (uint32_t)(d0 >> 26);
-		h0 = (uint32_t)d0 & 0x3ffffff;
-		d1 += c;
-		c = (uint32_t)(d1 >> 26);
-		h1 = (uint32_t)d1 & 0x3ffffff;
-		d2 += c;
-		c = (uint32_t)(d2 >> 26);
-		h2 = (uint32_t)d2 & 0x3ffffff;
-		d3 += c;
-		c = (uint32_t)(d3 >> 26);
-		h3 = (uint32_t)d3 & 0x3ffffff;
-		d4 += c;
-		c = (uint32_t)(d4 >> 26);
-		h4 = (uint32_t)d4 & 0x3ffffff;
-		h0 += c * 5;
-		c = (h0 >> 26);
-		h0 = h0 & 0x3ffffff;
-		h1 += c;
-
-		input += POLY1305_BLOCK_SIZE;
-		len -= POLY1305_BLOCK_SIZE;
-	}
-
-	st->h[0] = h0;
-	st->h[1] = h1;
-	st->h[2] = h2;
-	st->h[3] = h3;
-	st->h[4] = h4;
-}
-
-static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16],
-			       const uint32_t nonce[4])
-{
-	uint32_t h0, h1, h2, h3, h4, c;
-	uint32_t g0, g1, g2, g3, g4;
-	uint64_t f;
-	uint32_t mask;
-
-	/* fully carry h */
-	h0 = st->h[0];
-	h1 = st->h[1];
-	h2 = st->h[2];
-	h3 = st->h[3];
-	h4 = st->h[4];
-
-	c = h1 >> 26;
-	h1 = h1 & 0x3ffffff;
-	h2 += c;
-	c = h2 >> 26;
-	h2 = h2 & 0x3ffffff;
-	h3 += c;
-	c = h3 >> 26;
-	h3 = h3 & 0x3ffffff;
-	h4 += c;
-	c = h4 >> 26;
-	h4 = h4 & 0x3ffffff;
-	h0 += c * 5;
-	c = h0 >> 26;
-	h0 = h0 & 0x3ffffff;
-	h1 += c;
-
-	/* compute h + -p */
-	g0 = h0 + 5;
-	c = g0 >> 26;
-	g0 &= 0x3ffffff;
-	g1 = h1 + c;
-	c = g1 >> 26;
-	g1 &= 0x3ffffff;
-	g2 = h2 + c;
-	c = g2 >> 26;
-	g2 &= 0x3ffffff;
-	g3 = h3 + c;
-	c = g3 >> 26;
-	g3 &= 0x3ffffff;
-	g4 = h4 + c - (1UL << 26);
-
-	/* select h if h < p, or h + -p if h >= p */
-	mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
-	g0 &= mask;
-	g1 &= mask;
-	g2 &= mask;
-	g3 &= mask;
-	g4 &= mask;
-	mask = ~mask;
-
-	h0 = (h0 & mask) | g0;
-	h1 = (h1 & mask) | g1;
-	h2 = (h2 & mask) | g2;
-	h3 = (h3 & mask) | g3;
-	h4 = (h4 & mask) | g4;
-
-	/* h = h % (2^128) */
-	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
-	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
-	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
-	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
-
-	/* mac = (h + nonce) % (2^128) */
-	f = (uint64_t)h0 + nonce[0];
-	h0 = (uint32_t)f;
-	f = (uint64_t)h1 + nonce[1] + (f >> 32);
-	h1 = (uint32_t)f;
-	f = (uint64_t)h2 + nonce[2] + (f >> 32);
-	h2 = (uint32_t)f;
-	f = (uint64_t)h3 + nonce[3] + (f >> 32);
-	h3 = (uint32_t)f;
-
-	put_unaligned_le32(h0, &mac[0]);
-	put_unaligned_le32(h1, &mac[4]);
-	put_unaligned_le32(h2, &mac[8]);
-	put_unaligned_le32(h3, &mac[12]);
-}
-
-static void poly1305_init(struct poly1305_ctx *ctx,
-			  const uint8_t key[POLY1305_KEY_SIZE])
-{
-	ctx->nonce[0] = get_unaligned_le32(&key[16]);
-	ctx->nonce[1] = get_unaligned_le32(&key[20]);
-	ctx->nonce[2] = get_unaligned_le32(&key[24]);
-	ctx->nonce[3] = get_unaligned_le32(&key[28]);
-
-	poly1305_init_core(&ctx->state, key);
-
-	ctx->num = 0;
-}
-
-static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input,
-			    size_t len)
-{
-	const size_t num = ctx->num;
-	size_t rem;
-
-	if (num) {
-		rem = POLY1305_BLOCK_SIZE - num;
-		if (len < rem) {
-			memcpy(ctx->data + num, input, len);
-			ctx->num = num + len;
-			return;
-		}
-		memcpy(ctx->data + num, input, rem);
-		poly1305_blocks_core(&ctx->state, ctx->data,
-				     POLY1305_BLOCK_SIZE, 1);
-		input += rem;
-		len -= rem;
-	}
-
-	rem = len % POLY1305_BLOCK_SIZE;
-	len -= rem;
-
-	if (len >= POLY1305_BLOCK_SIZE) {
-		poly1305_blocks_core(&ctx->state, input, len, 1);
-		input += len;
-	}
-
-	if (rem)
-		memcpy(ctx->data, input, rem);
-
-	ctx->num = rem;
-}
-
-static void poly1305_final(struct poly1305_ctx *ctx,
-			   uint8_t mac[POLY1305_MAC_SIZE])
-{
-	size_t num = ctx->num;
-
-	if (num) {
-		ctx->data[num++] = 1;
-		while (num < POLY1305_BLOCK_SIZE)
-			ctx->data[num++] = 0;
-		poly1305_blocks_core(&ctx->state, ctx->data,
-				     POLY1305_BLOCK_SIZE, 0);
-	}
-
-	poly1305_emit_core(&ctx->state, mac, ctx->nonce);
-
-	explicit_bzero(ctx, sizeof(*ctx));
-}
-#endif
-
-#ifdef COMPAT_NEED_CHACHA20POLY1305
-static const uint8_t pad0[16] = { 0 };
-
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	struct poly1305_ctx poly1305_state;
-	struct chacha20_ctx chacha20_state;
-	union {
-		uint8_t block0[POLY1305_KEY_SIZE];
-		uint64_t lens[2];
-	} b = { { 0 } };
-
-	chacha20_init(&chacha20_state, key, nonce);
-	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-	poly1305_init(&poly1305_state, b.block0);
-
-	poly1305_update(&poly1305_state, ad, ad_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-	chacha20(&chacha20_state, dst, src, src_len);
-
-	poly1305_update(&poly1305_state, dst, src_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
-
-	b.lens[0] = cpu_to_le64(ad_len);
-	b.lens[1] = cpu_to_le64(src_len);
-	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-	poly1305_final(&poly1305_state, dst + src_len);
-
-	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-	explicit_bzero(&b, sizeof(b));
-}
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	struct poly1305_ctx poly1305_state;
-	struct chacha20_ctx chacha20_state;
-	bool ret;
-	size_t dst_len;
-	union {
-		uint8_t block0[POLY1305_KEY_SIZE];
-		uint8_t mac[POLY1305_MAC_SIZE];
-		uint64_t lens[2];
-	} b = { { 0 } };
-
-	if (src_len < POLY1305_MAC_SIZE)
-		return false;
-
-	chacha20_init(&chacha20_state, key, nonce);
-	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-	poly1305_init(&poly1305_state, b.block0);
-
-	poly1305_update(&poly1305_state, ad, ad_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-	dst_len = src_len - POLY1305_MAC_SIZE;
-	poly1305_update(&poly1305_state, src, dst_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
-
-	b.lens[0] = cpu_to_le64(ad_len);
-	b.lens[1] = cpu_to_le64(dst_len);
-	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-	poly1305_final(&poly1305_state, b.mac);
-
-	ret = timingsafe_bcmp(b.mac, src + dst_len, POLY1305_MAC_SIZE) == 0;
-	if (ret)
-		chacha20(&chacha20_state, dst, src, dst_len);
-
-	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-	explicit_bzero(&b, sizeof(b));
-
-	return ret;
-}
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len, const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-	hchacha20(derived_key, nonce, key);
-	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-	chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
-				 get_unaligned_le64(nonce + 16),
-				 (uint8_t *)derived_key);
-	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-}
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len,  const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	bool ret;
-	uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-	hchacha20(derived_key, nonce, key);
-	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-	ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
-				       get_unaligned_le64(nonce + 16),
-				       (uint8_t *)derived_key);
-	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-	return ret;
-}
-#endif
-
-#ifdef COMPAT_NEED_CHACHA20POLY1305_MBUF
-static inline int
-chacha20poly1305_crypt_mbuf(struct mbuf *m0, uint64_t nonce,
-			    const uint8_t key[CHACHA20POLY1305_KEY_SIZE], bool encrypt)
-{
-	struct poly1305_ctx poly1305_state;
-	struct chacha20_ctx chacha20_state;
-	uint8_t *buf, mbuf_mac[POLY1305_MAC_SIZE];
-	size_t len, leftover = 0;
-	struct mbuf *m;
-	int ret;
-	union {
-		uint32_t stream[CHACHA20_BLOCK_WORDS];
-		uint8_t block0[POLY1305_KEY_SIZE];
-		uint8_t mac[POLY1305_MAC_SIZE];
-		uint64_t lens[2];
-	} b = { { 0 } };
-
-	if (!encrypt) {
-		if (m0->m_pkthdr.len < POLY1305_MAC_SIZE)
-			return EMSGSIZE;
-		m_copydata(m0, m0->m_pkthdr.len - POLY1305_MAC_SIZE, POLY1305_MAC_SIZE, mbuf_mac);
-		m_adj(m0, -POLY1305_MAC_SIZE);
-	}
-
-	chacha20_init(&chacha20_state, key, nonce);
-	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-	poly1305_init(&poly1305_state, b.block0);
-
-	for (m = m0; m; m = m->m_next) {
-		len = m->m_len;
-		buf = m->m_data;
-
-		if (!encrypt)
-			poly1305_update(&poly1305_state, m->m_data, m->m_len);
-
-		if (leftover != 0) {
-			size_t l = min(len, leftover);
-			xor_cpy(buf, buf, ((uint8_t *)b.stream) + (CHACHA20_BLOCK_SIZE - leftover), l);
-			leftover -= l;
-			buf += l;
-			len -= l;
-		}
-
-		while (len >= CHACHA20_BLOCK_SIZE) {
-			chacha20_block(&chacha20_state, b.stream);
-			xor_cpy(buf, buf, (uint8_t *)b.stream, CHACHA20_BLOCK_SIZE);
-			buf += CHACHA20_BLOCK_SIZE;
-			len -= CHACHA20_BLOCK_SIZE;
-		}
-
-		if (len) {
-			chacha20_block(&chacha20_state, b.stream);
-			xor_cpy(buf, buf, (uint8_t *)b.stream, len);
-			leftover = CHACHA20_BLOCK_SIZE - len;
-		}
-
-		if (encrypt)
-			poly1305_update(&poly1305_state, m->m_data, m->m_len);
-	}
-	poly1305_update(&poly1305_state, pad0, (0x10 - m0->m_pkthdr.len) & 0xf);
-
-	b.lens[0] = 0;
-	b.lens[1] = cpu_to_le64(m0->m_pkthdr.len);
-	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-	poly1305_final(&poly1305_state, b.mac);
-
-	if (encrypt)
-		ret = m_append(m0, POLY1305_MAC_SIZE, b.mac) ? 0 : ENOMEM;
-	else
-		ret = timingsafe_bcmp(b.mac, mbuf_mac, POLY1305_MAC_SIZE) == 0 ? 0 : EBADMSG;
-
-	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-	explicit_bzero(&b, sizeof(b));
-
-	return ret;
-}
-
-int
-chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	return chacha20poly1305_crypt_mbuf(m, nonce, key, true);
-}
-
-int
-chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	return chacha20poly1305_crypt_mbuf(m, nonce, key, false);
-}
-#else
-static int
-crypto_callback(struct cryptop *crp)
-{
-	return (0);
-}
-
-int
-chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
-			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	static const char blank_tag[POLY1305_HASH_LEN];
-	struct cryptop crp;
-	int ret;
-
-	if (!m_append(m, POLY1305_HASH_LEN, blank_tag))
-		return (ENOMEM);
-	crypto_initreq(&crp, chacha20_poly1305_sid);
-	crp.crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST;
-	crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM;
-	crypto_use_mbuf(&crp, m);
-	crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN;
-	crp.crp_digest_start = crp.crp_payload_length;
-	le64enc(crp.crp_iv, nonce);
-	crp.crp_cipher_key = key;
-	crp.crp_callback = crypto_callback;
-	ret = crypto_dispatch(&crp);
-	crypto_destroyreq(&crp);
-	return (ret);
*** 965 LINES SKIPPED ***