git: bc36a0d1df83 - stable/12 - Fortuna: Add Chacha20 as an alternative stream cipher

From: David E. O'Brien <obrien_at_FreeBSD.org>
Date: Sat, 12 Feb 2022 22:58:35 UTC
The branch stable/12 has been updated by obrien:

URL: https://cgit.FreeBSD.org/src/commit/?id=bc36a0d1df83e42659e7224e3838ef1a0ab4bfcd

commit bc36a0d1df83e42659e7224e3838ef1a0ab4bfcd
Author:     Conrad Meyer <cem@FreeBSD.org>
AuthorDate: 2019-03-08 01:17:20 +0000
Commit:     David E. O'Brien <obrien@FreeBSD.org>
CommitDate: 2022-02-12 22:28:06 +0000

    Fortuna: Add Chacha20 as an alternative stream cipher
    
    Chacha20 with a 256 bit key and 128 bit counter size is a good match for an
    AES256-ICM replacement.
    
    In userspace, Chacha20 is typically marginally slower than AES-ICM on
    machines with AESNI intrinsics, but typically much faster than AES on
    machines without special intrinsics.  ChaCha20 does well on typical modern
    architectures with SIMD instructions, which includes most types of machines
    FreeBSD runs on.
    
    In the kernel, we can't (or don't) make use of AESNI intrinsics for
    random(4) anyway.  So even on amd64, using Chacha provides a modest
    performance improvement in random device throughput today.
    
    This change makes the stream cipher used by random(4) configurable at boot
    time with the 'kern.random.use_chacha20_cipher' tunable.
    
    Very rough, non-scientific measurements at the /dev/random device, on a
    GENERIC-NODEBUG amd64 VM with 'pv', show a factor of 2.2x higher throughput
    for Chacha20 over the existing AES-ICM mode.
    
    (cherry picked from commit ab69c4858cb7237623cb8dddb8cfe101f50ad3d8)
---
 sys/dev/random/fortuna.c |  17 ++++--
 sys/dev/random/hash.c    | 133 ++++++++++++++++++++++++++++++++++++++++++-----
 sys/dev/random/hash.h    |  18 +++++--
 sys/dev/random/uint128.h |  42 ++++++++++++++-
 4 files changed, 188 insertions(+), 22 deletions(-)

diff --git a/sys/dev/random/fortuna.c b/sys/dev/random/fortuna.c
index 31fcab9e89ff..e8a407525138 100644
--- a/sys/dev/random/fortuna.c
+++ b/sys/dev/random/fortuna.c
@@ -109,7 +109,7 @@ static struct fortuna_state {
 	} fs_pool[RANDOM_FORTUNA_NPOOLS];
 	u_int fs_reseedcount;		/* ReseedCnt */
 	uint128_t fs_counter;		/* C */
-	struct randomdev_key fs_key;	/* K */
+	union randomdev_key fs_key;	/* K */
 	u_int fs_minpoolsize;		/* Extras */
 	/* Extras for the OS */
 #ifdef _KERNEL
@@ -271,16 +271,27 @@ random_fortuna_reseed_internal(uint32_t *entropy_data, u_int blockcount)
 {
 	struct randomdev_hash context;
 	uint8_t hash[RANDOM_KEYSIZE];
+	const void *keymaterial;
+	size_t keysz;
+	bool seeded;
 
 	RANDOM_RESEED_ASSERT_LOCK_OWNED();
+
+	seeded = random_fortuna_seeded();
+	if (seeded) {
+		randomdev_getkey(&fortuna_state.fs_key, &keymaterial, &keysz);
+		KASSERT(keysz == RANDOM_KEYSIZE, ("%s: key size %zu not %u",
+			__func__, keysz, (unsigned)RANDOM_KEYSIZE));
+	}
+
 	/*-
 	 * FS&K - K = Hd(K|s) where Hd(m) is H(H(0^512|m))
 	 *      - C = C + 1
 	 */
 	randomdev_hash_init(&context);
 	randomdev_hash_iterate(&context, zero_region, RANDOM_ZERO_BLOCKSIZE);
-	randomdev_hash_iterate(&context, &fortuna_state.fs_key.key.keyMaterial,
-	    fortuna_state.fs_key.key.keyLen / 8);
+	if (seeded)
+		randomdev_hash_iterate(&context, keymaterial, keysz);
 	randomdev_hash_iterate(&context, entropy_data, RANDOM_KEYSIZE*blockcount);
 	randomdev_hash_finish(&context, hash);
 	randomdev_hash_init(&context);
diff --git a/sys/dev/random/hash.c b/sys/dev/random/hash.c
index 0bad46519f50..2f3cb4738bc6 100644
--- a/sys/dev/random/hash.c
+++ b/sys/dev/random/hash.c
@@ -30,6 +30,9 @@ __FBSDID("$FreeBSD$");
 
 #ifdef _KERNEL
 #include <sys/param.h>
+#include <sys/malloc.h>
+#include <sys/random.h>
+#include <sys/sysctl.h>
 #include <sys/systm.h>
 #else /* !_KERNEL */
 #include <sys/param.h>
@@ -42,14 +45,39 @@ __FBSDID("$FreeBSD$");
 #include "unit_test.h"
 #endif /* _KERNEL */
 
+#define CHACHA_EMBED
+#define KEYSTREAM_ONLY
+#define CHACHA_NONCE0_CTR128
+#include <crypto/chacha20/chacha.c>
 #include <crypto/rijndael/rijndael-api-fst.h>
 #include <crypto/sha2/sha256.h>
 
 #include <dev/random/hash.h>
+#ifdef _KERNEL
+#include <dev/random/randomdev.h>
+#endif
 
 /* This code presumes that RANDOM_KEYSIZE is twice as large as RANDOM_BLOCKSIZE */
 CTASSERT(RANDOM_KEYSIZE == 2*RANDOM_BLOCKSIZE);
 
+/* Validate that full Chacha IV is as large as the 128-bit counter */
+_Static_assert(CHACHA_STATELEN == RANDOM_BLOCKSIZE, "");
+
+/*
+ * Experimental Chacha20-based PRF for Fortuna keystream primitive.  For now,
+ * disabled by default.  But we may enable it in the future.
+ *
+ * Benefits include somewhat faster keystream generation compared with
+ * unaccelerated AES-ICM.
+ */
+bool random_chachamode = false;
+#ifdef _KERNEL
+SYSCTL_BOOL(_kern_random, OID_AUTO, use_chacha20_cipher, CTLFLAG_RDTUN,
+    &random_chachamode, 0,
+    "If non-zero, use the ChaCha20 cipher for randomdev PRF.  "
+    "If zero, use AES-ICM cipher for randomdev PRF (default).");
+#endif
+
 /* Initialise the hash */
 void
 randomdev_hash_init(struct randomdev_hash *context)
@@ -81,11 +109,15 @@ randomdev_hash_finish(struct randomdev_hash *context, void *buf)
  * data.
  */
 void
-randomdev_encrypt_init(struct randomdev_key *context, const void *data)
+randomdev_encrypt_init(union randomdev_key *context, const void *data)
 {
 
-	rijndael_cipherInit(&context->cipher, MODE_ECB, NULL);
-	rijndael_makeKey(&context->key, DIR_ENCRYPT, RANDOM_KEYSIZE*8, data);
+	if (random_chachamode) {
+		chacha_keysetup(&context->chacha, data, RANDOM_KEYSIZE * 8);
+	} else {
+		rijndael_cipherInit(&context->cipher, MODE_ECB, NULL);
+		rijndael_makeKey(&context->key, DIR_ENCRYPT, RANDOM_KEYSIZE*8, data);
+	}
 }
 
 /*
@@ -95,19 +127,96 @@ randomdev_encrypt_init(struct randomdev_key *context, const void *data)
  * bytes are generated.
  */
 void
-randomdev_keystream(struct randomdev_key *context, uint128_t *ctr,
+randomdev_keystream(union randomdev_key *context, uint128_t *ctr,
     void *d_out, u_int blockcount)
 {
 	u_int i;
 
-	for (i = 0; i < blockcount; i++) {
-		/*-
-		 * FS&K - r = r|E(K,C)
-		 *      - C = C + 1
+	if (random_chachamode) {
+		uint128_t lectr;
+
+		/*
+		 * Chacha always encodes and increments the counter little
+		 * endian.  So on BE machines, we must provide a swapped
+		 * counter to chacha, and swap the output too.
+		 */
+		le128enc(&lectr, *ctr);
+
+		chacha_ivsetup(&context->chacha, NULL, (const void *)&lectr);
+		chacha_encrypt_bytes(&context->chacha, NULL, d_out,
+		    RANDOM_BLOCKSIZE * blockcount);
+
+		/*
+		 * Decode Chacha-updated LE counter to native endian and store
+		 * it back in the caller's in-out parameter.
 		 */
-		rijndael_blockEncrypt(&context->cipher, &context->key,
-		    (void *)ctr, RANDOM_BLOCKSIZE * 8, d_out);
-		d_out = (char *)d_out + RANDOM_BLOCKSIZE;
-		uint128_increment(ctr);
+		chacha_ctrsave(&context->chacha, (void *)&lectr);
+		*ctr = le128dec(&lectr);
+	} else {
+		for (i = 0; i < blockcount; i++) {
+			/*-
+			 * FS&K - r = r|E(K,C)
+			 *      - C = C + 1
+			 */
+			rijndael_blockEncrypt(&context->cipher, &context->key,
+			    (void *)ctr, RANDOM_BLOCKSIZE * 8, d_out);
+			d_out = (char *)d_out + RANDOM_BLOCKSIZE;
+			uint128_increment(ctr);
+		}
 	}
 }
+
+/*
+ * Fetch a pointer to the relevant key material and its size.
+ *
+ * This API is expected to only be used only for reseeding, where the
+ * endianness does not matter; the goal is to simply incorporate the key
+ * material into the hash iterator that will produce key'.
+ *
+ * Do not expect the buffer pointed to by this API to match the exact
+ * endianness, etc, as the key material that was supplied to
+ * randomdev_encrypt_init().
+ */
+void
+randomdev_getkey(union randomdev_key *context, const void **keyp, size_t *szp)
+{
+
+	if (!random_chachamode) {
+		*keyp = &context->key.keyMaterial;
+		*szp = context->key.keyLen / 8;
+		return;
+	}
+
+	/* Chacha20 mode */
+	*keyp = (const void *)&context->chacha.input[4];
+
+	/* Sanity check keysize */
+	if (context->chacha.input[0] == U8TO32_LITTLE(sigma) &&
+	    context->chacha.input[1] == U8TO32_LITTLE(&sigma[4]) &&
+	    context->chacha.input[2] == U8TO32_LITTLE(&sigma[8]) &&
+	    context->chacha.input[3] == U8TO32_LITTLE(&sigma[12])) {
+		*szp = 32;
+		return;
+	}
+
+#if 0
+	/*
+	 * Included for the sake of completeness; as-implemented, Fortuna
+	 * doesn't need or use 128-bit Chacha20.
+	 */
+	if (context->chacha->input[0] == U8TO32_LITTLE(tau) &&
+	    context->chacha->input[1] == U8TO32_LITTLE(&tau[4]) &&
+	    context->chacha->input[2] == U8TO32_LITTLE(&tau[8]) &&
+	    context->chacha->input[3] == U8TO32_LITTLE(&tau[12])) {
+		*szp = 16;
+		return;
+	}
+#endif
+
+#ifdef _KERNEL
+	panic("%s: Invalid chacha20 keysize: %16D\n", __func__,
+	    (void *)context->chacha.input, " ");
+#else
+	raise(SIGKILL);
+#endif
+}
diff --git a/sys/dev/random/hash.h b/sys/dev/random/hash.h
index ff24d3fb802d..bcc7035a0e4f 100644
--- a/sys/dev/random/hash.h
+++ b/sys/dev/random/hash.h
@@ -29,6 +29,7 @@
 #ifndef SYS_DEV_RANDOM_HASH_H_INCLUDED
 #define	SYS_DEV_RANDOM_HASH_H_INCLUDED
 
+#include <crypto/chacha20/_chacha.h>
 #include <dev/random/uint128.h>
 
 /* Keys are formed from cipher blocks */
@@ -45,15 +46,22 @@ struct randomdev_hash {
 	SHA256_CTX	sha;
 };
 
-struct randomdev_key {
-	keyInstance key;	/* Key schedule */
-	cipherInstance cipher;	/* Rijndael internal */
+union randomdev_key {
+	struct {
+		keyInstance key;	/* Key schedule */
+		cipherInstance cipher;	/* Rijndael internal */
+	};
+	struct chacha_ctx chacha;
 };
 
+extern bool fortuna_chachamode;
+
 void randomdev_hash_init(struct randomdev_hash *);
 void randomdev_hash_iterate(struct randomdev_hash *, const void *, size_t);
 void randomdev_hash_finish(struct randomdev_hash *, void *);
-void randomdev_encrypt_init(struct randomdev_key *, const void *);
-void randomdev_keystream(struct randomdev_key *context, uint128_t *, void *, u_int);
+
+void randomdev_encrypt_init(union randomdev_key *, const void *);
+void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, u_int);
+void randomdev_getkey(union randomdev_key *, const void **, size_t *);
 
 #endif /* SYS_DEV_RANDOM_HASH_H_INCLUDED */
diff --git a/sys/dev/random/uint128.h b/sys/dev/random/uint128.h
index 22380b2f6a3b..63de28a3864a 100644
--- a/sys/dev/random/uint128.h
+++ b/sys/dev/random/uint128.h
@@ -29,6 +29,8 @@
 #ifndef SYS_DEV_RANDOM_UINT128_H_INCLUDED
 #define	SYS_DEV_RANDOM_UINT128_H_INCLUDED
 
+#include <sys/endian.h>
+
 /* This whole thing is a crock :-(
  *
  * Everyone knows you always need the __uint128_t types!
@@ -63,13 +65,49 @@ uint128_increment(uint128_t *big_uintp)
 #endif
 }
 
+static __inline bool
+uint128_equals(uint128_t a, uint128_t b)
+{
+#ifdef USE_REAL_UINT128_T
+	return (a == b);
+#else
+	return (a.u128t_word0 == b.u128t_word0 &&
+	    a.u128t_word1 == b.u128t_word1);
+#endif
+}
+
 static __inline int
 uint128_is_zero(uint128_t big_uint)
 {
+	return (uint128_equals(big_uint, UINT128_ZERO));
+}
+
+static __inline uint128_t
+le128dec(const void *pp)
+{
+	const uint8_t *p = pp;
+
+#ifdef USE_REAL_UINT128_T
+	return (((uint128_t)le64dec(p + 8) << 64) | le64dec(p));
+#else
+	return ((uint128_t){
+	    .u128t_word0 = le64dec(p),
+	    .u128t_word1 = le64dec(p + 8),
+	    });
+#endif
+}
+
+static __inline void
+le128enc(void *pp, uint128_t u)
+{
+	uint8_t *p = pp;
+
 #ifdef USE_REAL_UINT128_T
-	return (big_uint == UINT128_ZERO);
+	le64enc(p, (uint64_t)(u & UINT64_MAX));
+	le64enc(p + 8, (uint64_t)(u >> 64));
 #else
-	return (big_uint.u128t_word0 == 0UL && big_uint.u128t_word1 == 0UL);
+	le64enc(p, u.u128t_word0);
+	le64enc(p + 8, u.u128t_word1);
 #endif
 }