kern/182610: arc4random(): replace RC4 with ChaCha20, follow OpenBSD

Christian Weisgerber naddy at FreeBSD.org
Thu Oct 3 19:00:00 UTC 2013


>Number:         182610
>Category:       kern
>Synopsis:       arc4random(): replace RC4 with ChaCha20, follow OpenBSD
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Thu Oct 03 19:00:00 UTC 2013
>Closed-Date:
>Last-Modified:
>Originator:     Christian Weisgerber
>Release:        FreeBSD 9.2-STABLE amd64
>Organization:
>Environment:
System: FreeBSD lorvorc.mips.inka.de 9.2-STABLE FreeBSD 9.2-STABLE #0 r255924: Sat Sep 28 14:16:39 CEST 2013 naddy at lorvorc.mips.inka.de:/usr/obj/usr/src/sys/GENERIC amd64
	
>Description:

The appended patch (against head) syncs FreeBSD's arc4random(3)
implementation with that of OpenBSD, in particular Markus Friedl's
latest work:

It replaces RC4 in arc4random() with the more secure stream cipher
ChaCha20 (http://cr.yp.to/chacha.html) by Bernstein.

It's about as fast as the original code. The main difference is
that it creates a 1024 byte key stream and returns randomness from
this buffer before rekeying and refilling the buffer.

The choice of ChaCha and the overall design is heavily influenced by
Nick Mathewson's libottery.

References:
http://www.openbsd.org/cgi-bin/cvsweb/src/lib/libc/crypt/
https://github.com/nmathewson/libottery

>How-To-Repeat:

>Fix:

Index: arc4random.c
===================================================================
--- arc4random.c	(revision 256022)
+++ arc4random.c	(working copy)
@@ -1,8 +1,9 @@
-/*	$OpenBSD: arc4random.c,v 1.22 2010/12/22 08:23:42 otto Exp $	*/
+/*	$OpenBSD: arc4random.c,v 1.25 2013/10/01 18:34:57 markus Exp $	*/
 
 /*
  * Copyright (c) 1996, David Mazieres <dm at uun.org>
  * Copyright (c) 2008, Damien Miller <djm at openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus at openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -18,15 +19,7 @@
  */
 
 /*
- * Arc4 random number generator for OpenBSD.
- *
- * This code is derived from section 17.1 of Applied Cryptography,
- * second edition, which describes a stream cipher allegedly
- * compatible with RSA Labs "RC4" cipher (the actual description of
- * which is a trade secret).  The same algorithm is used as a stream
- * cipher called "arcfour" in Tatu Ylonen's ssh package.
- *
- * RC4 is a registered trademark of RSA Laboratories.
+ * ChaCha based random number generator for OpenBSD.
  */
 
 #include <sys/cdefs.h>
@@ -36,6 +29,7 @@
 #include <fcntl.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/param.h>
@@ -46,6 +40,9 @@
 #include "libc_private.h"
 #include "un-namespace.h"
 
+#define KEYSTREAM_ONLY
+#include "chacha_private.h"
+
 #ifdef __GNUC__
 #define inline __inline
 #else				/* !__GNUC__ */
@@ -52,16 +49,13 @@
 #define inline
 #endif				/* !__GNUC__ */
 
-struct arc4_stream {
-	u_int8_t i;
-	u_int8_t j;
-	u_int8_t s[256];
-};
-
 static pthread_mutex_t	arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
 
 #define	RANDOMDEV	"/dev/random"
-#define	KEYSIZE		128
+#define	KEYSZ		32
+#define	IVSZ		8
+#define	BLOCKSZ		64
+#define	RSBUFSZ		(16*BLOCKSZ)
 #define	_ARC4_LOCK()						\
 	do {							\
 		if (__isthreaded)				\
@@ -75,46 +69,28 @@
 	} while (0)
 
 static int rs_initialized;
-static struct arc4_stream rs;
-static pid_t arc4_stir_pid;
-static int arc4_count;
+static pid_t rs_stir_pid;
+static chacha_ctx rs;		/* chacha context for random keystream */
+static u_char rs_buf[RSBUFSZ];	/* keystream blocks */
+static size_t rs_have;		/* valid bytes at end of rs_buf */
+static size_t rs_count;		/* bytes till reseed */
 
 extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
 
-static inline u_int8_t arc4_getbyte(void);
-static void arc4_stir(void);
+static inline void _rs_rekey(u_char *dat, size_t datlen);
 
 static inline void
-arc4_init(void)
+_rs_init(u_char *buf, size_t n)
 {
-	int     n;
-
-	for (n = 0; n < 256; n++)
-		rs.s[n] = n;
-	rs.i = 0;
-	rs.j = 0;
+	if (n < KEYSZ + IVSZ)
+		return;
+	chacha_keysetup(&rs, buf, KEYSZ * 8, 0);
+	chacha_ivsetup(&rs, buf + KEYSZ);
 }
 
-static inline void
-arc4_addrandom(u_char *dat, int datlen)
-{
-	int     n;
-	u_int8_t si;
-
-	rs.i--;
-	for (n = 0; n < 256; n++) {
-		rs.i = (rs.i + 1);
-		si = rs.s[rs.i];
-		rs.j = (rs.j + si + dat[n % datlen]);
-		rs.s[rs.i] = rs.s[rs.j];
-		rs.s[rs.j] = si;
-	}
-	rs.j = rs.i;
-}
-
 static size_t
-arc4_sysctl(u_char *buf, size_t size)
+_rs_sysctl(u_char *buf, size_t size)
 {
 	int mib[2];
 	size_t len, done;
@@ -136,26 +112,22 @@
 }
 
 static void
-arc4_stir(void)
+_rs_stir(void)
 {
-	int done, fd, i;
+	int done, fd;
 	struct {
 		struct timeval	tv;
 		pid_t		pid;
-		u_char	 	rnd[KEYSIZE];
+		u_char	 	rnd[KEYSZ + IVSZ];
 	} rdat;
 
-	if (!rs_initialized) {
-		arc4_init();
-		rs_initialized = 1;
-	}
 	done = 0;
-	if (arc4_sysctl((u_char *)&rdat, KEYSIZE) == KEYSIZE)
+	if (_rs_sysctl((u_char *)&rdat, KEYSZ + IVSZ) == KEYSZ + IVSZ)
 		done = 1;
 	if (!done) {
 		fd = _open(RANDOMDEV, O_RDONLY | O_CLOEXEC, 0);
 		if (fd >= 0) {
-			if (_read(fd, &rdat, KEYSIZE) == KEYSIZE)
+			if (_read(fd, &rdat, KEYSZ + IVSZ) == KEYSZ + IVSZ)
 				done = 1;
 			(void)_close(fd);
 		}
@@ -166,52 +138,85 @@
 		/* We'll just take whatever was on the stack too... */
 	}
 
-	arc4_addrandom((u_char *)&rdat, KEYSIZE);
+	if (!rs_initialized) {
+		rs_initialized = 1;
+		_rs_init((u_char *)&rdat, KEYSZ + IVSZ);
+	} else
+		_rs_rekey((u_char *)&rdat, KEYSZ + IVSZ);
+	memset((u_char *)&rdat, 0, sizeof(rdat));
 
-	/*
-	 * Discard early keystream, as per recommendations in:
-	 * "(Not So) Random Shuffles of RC4" by Ilya Mironov.
-	 */
-	for (i = 0; i < 1024; i++)
-		(void)arc4_getbyte();
-	arc4_count = 1600000;
+	/* invalidate rs_buf */
+	rs_have = 0;
+	memset(rs_buf, 0, RSBUFSZ);
+
+	rs_count = 1600000;
 }
 
-static void
-arc4_stir_if_needed(void)
+static inline void
+_rs_stir_if_needed(size_t len)
 {
 	pid_t pid = getpid();
 
-	if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != pid)
-	{
-		arc4_stir_pid = pid;
-		arc4_stir();
+	if (rs_count <= len || !rs_initialized || rs_stir_pid != pid) {
+		rs_stir_pid = pid;
+		_rs_stir();
+	} else
+		rs_count -= len;
+}
+
+static inline void
+_rs_rekey(u_char *dat, size_t datlen)
+{
+#ifndef KEYSTREAM_ONLY
+	memset(rs_buf, 0,RSBUFSZ);
+#endif
+	/* fill rs_buf with the keystream */
+	chacha_encrypt_bytes(&rs, rs_buf, rs_buf, RSBUFSZ);
+	/* mix in optional user provided data */
+	if (dat) {
+		size_t i, m;
+
+		m = MIN(datlen, KEYSZ + IVSZ);
+		for (i = 0; i < m; i++)
+			rs_buf[i] ^= dat[i];
 	}
+	/* immediately reinit for backtracking resistance */
+	_rs_init(rs_buf, KEYSZ + IVSZ);
+	memset(rs_buf, 0, KEYSZ + IVSZ);
+	rs_have = RSBUFSZ - KEYSZ - IVSZ;
 }
 
-static inline u_int8_t
-arc4_getbyte(void)
+static inline void
+_rs_random_buf(void *_buf, size_t n)
 {
-	u_int8_t si, sj;
+	u_char *buf = (u_char *)_buf;
+	size_t m;
 
-	rs.i = (rs.i + 1);
-	si = rs.s[rs.i];
-	rs.j = (rs.j + si);
-	sj = rs.s[rs.j];
-	rs.s[rs.i] = sj;
-	rs.s[rs.j] = si;
-	return (rs.s[(si + sj) & 0xff]);
+	_rs_stir_if_needed(n);
+	while (n > 0) {
+		if (rs_have > 0) {
+			m = MIN(n, rs_have);
+			memcpy(buf, rs_buf + RSBUFSZ - rs_have, m);
+			memset(rs_buf + RSBUFSZ - rs_have, 0, m);
+			buf += m;
+			n -= m;
+			rs_have -= m;
+		}
+		if (rs_have == 0)
+			_rs_rekey(NULL, 0);
+	}
 }
 
-static inline u_int32_t
-arc4_getword(void)
+static inline void
+_rs_random_u32(u_int32_t *val)
 {
-	u_int32_t val;
-	val = arc4_getbyte() << 24;
-	val |= arc4_getbyte() << 16;
-	val |= arc4_getbyte() << 8;
-	val |= arc4_getbyte();
-	return val;
+	_rs_stir_if_needed(sizeof(*val));
+	if (rs_have < sizeof(*val))
+		_rs_rekey(NULL, 0);
+	memcpy(val, rs_buf + RSBUFSZ - rs_have, sizeof(*val));
+	memset(rs_buf + RSBUFSZ - rs_have, 0, sizeof(*val));
+	rs_have -= sizeof(*val);
+	return;
 }
 
 void
@@ -218,7 +223,7 @@
 arc4random_stir(void)
 {
 	_ARC4_LOCK();
-	arc4_stir();
+	_rs_stir();
 	_ARC4_UNLOCK();
 }
 
@@ -225,10 +230,17 @@
 void
 arc4random_addrandom(u_char *dat, int datlen)
 {
+	int m;
+
 	_ARC4_LOCK();
 	if (!rs_initialized)
-		arc4_stir();
-	arc4_addrandom(dat, datlen);
+		_rs_stir();
+	while (datlen > 0) {
+		m = MIN(datlen, KEYSZ + IVSZ);
+		_rs_rekey(dat, m);
+		dat += m;
+		datlen -= m;
+	}
 	_ARC4_UNLOCK();
 }
 
@@ -236,25 +248,18 @@
 arc4random(void)
 {
 	u_int32_t val;
+
 	_ARC4_LOCK();
-	arc4_count -= 4;
-	arc4_stir_if_needed();
-	val = arc4_getword();
+	_rs_random_u32(&val);
 	_ARC4_UNLOCK();
 	return val;
 }
 
 void
-arc4random_buf(void *_buf, size_t n)
+arc4random_buf(void *buf, size_t n)
 {
-	u_char *buf = (u_char *)_buf;
 	_ARC4_LOCK();
-	arc4_stir_if_needed();
-	while (n--) {
-		if (--arc4_count <= 0)
-			arc4_stir();
-		buf[n] = arc4_getbyte();
-	}
+	_rs_random_buf(buf, n);
 	_ARC4_UNLOCK();
 }
 
@@ -276,17 +281,8 @@
 	if (upper_bound < 2)
 		return 0;
 
-#if (ULONG_MAX > 0xffffffffUL)
-	min = 0x100000000UL % upper_bound;
-#else
-	/* Calculate (2**32 % upper_bound) avoiding 64-bit math */
-	if (upper_bound > 0x80000000)
-		min = 1 + ~upper_bound;		/* 2**32 - upper_bound */
-	else {
-		/* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */
-		min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound;
-	}
-#endif
+	/* 2**32 % x == (2**32 - x) % x */
+	min = -upper_bound % upper_bound;
 
 	/*
 	 * This could theoretically loop forever but each retry has
@@ -321,5 +317,6 @@
 	v /= iter;
 
 	printf("%qd cycles\n", v);
+	exit(0);
 }
 #endif
Index: chacha_private.h
===================================================================
--- chacha_private.h	(revision 0)
+++ chacha_private.h	(working copy)
@@ -0,0 +1,220 @@
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct
+{
+  u32 input[16]; /* could be compressed */
+} chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+  (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+  (((u32)((p)[0])      ) | \
+   ((u32)((p)[1]) <<  8) | \
+   ((u32)((p)[2]) << 16) | \
+   ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+  do { \
+    (p)[0] = U8V((v)      ); \
+    (p)[1] = U8V((v) >>  8); \
+    (p)[2] = U8V((v) >> 16); \
+    (p)[3] = U8V((v) >> 24); \
+  } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+static const char sigma[16] = "expand 32-byte k";
+static const char tau[16] = "expand 16-byte k";
+
+static void
+chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
+{
+  const char *constants;
+
+  x->input[4] = U8TO32_LITTLE(k + 0);
+  x->input[5] = U8TO32_LITTLE(k + 4);
+  x->input[6] = U8TO32_LITTLE(k + 8);
+  x->input[7] = U8TO32_LITTLE(k + 12);
+  if (kbits == 256) { /* recommended */
+    k += 16;
+    constants = sigma;
+  } else { /* kbits == 128 */
+    constants = tau;
+  }
+  x->input[8] = U8TO32_LITTLE(k + 0);
+  x->input[9] = U8TO32_LITTLE(k + 4);
+  x->input[10] = U8TO32_LITTLE(k + 8);
+  x->input[11] = U8TO32_LITTLE(k + 12);
+  x->input[0] = U8TO32_LITTLE(constants + 0);
+  x->input[1] = U8TO32_LITTLE(constants + 4);
+  x->input[2] = U8TO32_LITTLE(constants + 8);
+  x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *x,const u8 *iv)
+{
+  x->input[12] = 0;
+  x->input[13] = 0;
+  x->input[14] = U8TO32_LITTLE(iv + 0);
+  x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
+{
+  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  u8 *ctarget;
+  u8 tmp[64];
+  int i;
+
+  if (!bytes) return;
+
+  j0 = x->input[0];
+  j1 = x->input[1];
+  j2 = x->input[2];
+  j3 = x->input[3];
+  j4 = x->input[4];
+  j5 = x->input[5];
+  j6 = x->input[6];
+  j7 = x->input[7];
+  j8 = x->input[8];
+  j9 = x->input[9];
+  j10 = x->input[10];
+  j11 = x->input[11];
+  j12 = x->input[12];
+  j13 = x->input[13];
+  j14 = x->input[14];
+  j15 = x->input[15];
+
+  for (;;) {
+    if (bytes < 64) {
+      for (i = 0;i < bytes;++i) tmp[i] = m[i];
+      m = tmp;
+      ctarget = c;
+      c = tmp;
+    }
+    x0 = j0;
+    x1 = j1;
+    x2 = j2;
+    x3 = j3;
+    x4 = j4;
+    x5 = j5;
+    x6 = j6;
+    x7 = j7;
+    x8 = j8;
+    x9 = j9;
+    x10 = j10;
+    x11 = j11;
+    x12 = j12;
+    x13 = j13;
+    x14 = j14;
+    x15 = j15;
+    for (i = 20;i > 0;i -= 2) {
+      QUARTERROUND( x0, x4, x8,x12)
+      QUARTERROUND( x1, x5, x9,x13)
+      QUARTERROUND( x2, x6,x10,x14)
+      QUARTERROUND( x3, x7,x11,x15)
+      QUARTERROUND( x0, x5,x10,x15)
+      QUARTERROUND( x1, x6,x11,x12)
+      QUARTERROUND( x2, x7, x8,x13)
+      QUARTERROUND( x3, x4, x9,x14)
+    }
+    x0 = PLUS(x0,j0);
+    x1 = PLUS(x1,j1);
+    x2 = PLUS(x2,j2);
+    x3 = PLUS(x3,j3);
+    x4 = PLUS(x4,j4);
+    x5 = PLUS(x5,j5);
+    x6 = PLUS(x6,j6);
+    x7 = PLUS(x7,j7);
+    x8 = PLUS(x8,j8);
+    x9 = PLUS(x9,j9);
+    x10 = PLUS(x10,j10);
+    x11 = PLUS(x11,j11);
+    x12 = PLUS(x12,j12);
+    x13 = PLUS(x13,j13);
+    x14 = PLUS(x14,j14);
+    x15 = PLUS(x15,j15);
+
+#ifndef KEYSTREAM_ONLY
+    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
+    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
+    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
+    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
+    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
+    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
+    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
+    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
+    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
+    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
+    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
+    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
+    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
+    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
+    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
+    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
+#endif
+
+    j12 = PLUSONE(j12);
+    if (!j12) {
+      j13 = PLUSONE(j13);
+      /* stopping at 2^70 bytes per nonce is user's responsibility */
+    }
+
+    U32TO8_LITTLE(c + 0,x0);
+    U32TO8_LITTLE(c + 4,x1);
+    U32TO8_LITTLE(c + 8,x2);
+    U32TO8_LITTLE(c + 12,x3);
+    U32TO8_LITTLE(c + 16,x4);
+    U32TO8_LITTLE(c + 20,x5);
+    U32TO8_LITTLE(c + 24,x6);
+    U32TO8_LITTLE(c + 28,x7);
+    U32TO8_LITTLE(c + 32,x8);
+    U32TO8_LITTLE(c + 36,x9);
+    U32TO8_LITTLE(c + 40,x10);
+    U32TO8_LITTLE(c + 44,x11);
+    U32TO8_LITTLE(c + 48,x12);
+    U32TO8_LITTLE(c + 52,x13);
+    U32TO8_LITTLE(c + 56,x14);
+    U32TO8_LITTLE(c + 60,x15);
+
+    if (bytes <= 64) {
+      if (bytes < 64) {
+        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+      }
+      x->input[12] = j12;
+      x->input[13] = j13;
+      return;
+    }
+    bytes -= 64;
+    c += 64;
+#ifndef KEYSTREAM_ONLY
+    m += 64;
+#endif
+  }
+}

Property changes on: chacha_private.h
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property

>Release-Note:
>Audit-Trail:
>Unformatted:


More information about the freebsd-bugs mailing list