svn commit: r357975 - head/usr.bin/random

Conrad Meyer cem at FreeBSD.org
Sat Feb 15 19:13:38 UTC 2020


Author: cem
Date: Sat Feb 15 19:13:37 2020
New Revision: 357975
URL: https://svnweb.freebsd.org/changeset/base/357975

Log:
  random(6): Re-add undocumented support for floating point denominators
  
  And document it in the manual page.
  
  PR:		244139
  Submitted by:	Keith White <kwhite AT site.uottawa.ca> (earlier version)

Modified:
  head/usr.bin/random/random.6
  head/usr.bin/random/random.c
  head/usr.bin/random/randomize_fd.c
  head/usr.bin/random/randomize_fd.h

Modified: head/usr.bin/random/random.6
==============================================================================
--- head/usr.bin/random/random.6	Sat Feb 15 18:57:49 2020	(r357974)
+++ head/usr.bin/random/random.6	Sat Feb 15 19:13:37 2020	(r357975)
@@ -28,7 +28,7 @@
 .\"     @(#)random.6	8.2 (Berkeley) 3/31/94
 .\" $FreeBSD$
 .\"
-.Dd December 12, 2019
+.Dd February 15, 2020
 .Dt RANDOM 6
 .Os
 .Sh NAME
@@ -42,27 +42,26 @@
 .Sh DESCRIPTION
 .Nm Random
 has two distinct modes of operations.
-The default is to read in lines
-from the standard input and randomly write them out
-to the standard output with a probability of
-1 /
+The default is to read lines from standard input and write them to standard
+output with a probability of 1.0 /
 .Ar denominator .
+.Ar ( denominator
+is a real number greater than or equal to 1.0.)
 The default
 .Ar denominator
-for this mode of operation is 2, giving each line a 50/50 chance of
+for this mode of operation is 2.0, giving each line a 50% chance of
 being displayed.
 .Pp
-The second mode of operation is to read in a file from
-.Ar filename
-and randomize the contents of the file and send it back out to
+The second mode of operation, selected with the
+.Fl f Ar filename
+option, reads the specified file and outputs the randomized contents to
 standard output.
-The contents can be randomized based off of newlines or based off of
-space characters as determined by
-.Xr isspace 3 .
+The contents can be randomized in units of lines (split on newline characters)
+or in units of words (split on space characters as determined by
+.Xr isspace 3 . )
 The default
 .Ar denominator
-for this mode of operation is 1, which gives each line a chance to be
-displayed, but in a random order.
+for this mode of operation is 1.0, which displays every line.
 .Pp
 The options are as follows:
 .Bl -tag -width Ds
@@ -75,6 +74,9 @@ does not read or write anything, and simply exits with
 exit value of 0 to
 .Ar denominator
 \&- 1, inclusive.
+In this mode,
+.Ar denominator
+must be less than or equal to 256.
 .It Fl f Ar filename
 The
 .Fl f
@@ -83,27 +85,18 @@ option is used to specify the
 to read from.
 Standard input is used if
 .Ar filename
-is set to
-.Sq Fl .
+is
+.Sq - .
 .It Fl l
 Randomize the input via newlines (the default).
 .It Fl r
-The
-.Fl r
-option guarantees that the output is unbuffered.
+Do not buffer output.
 .It Fl U
-Tells
-.Xr random 6
-that it is okay for it to reuse any given line or word when creating a
-randomized output.
+Reuse any given line or word when creating a randomized output.
 .It Fl u
-Tells
-.Xr random 6
-not to select the same line or word from a file more than once (the
-default).
+Do not select the same line or word from a file more than once (the default).
 This does not guarantee uniqueness if there are two of the
-same tokens from the input, but it does prevent selecting the same
-token more than once.
+same tokens in the input.
 .It Fl w
 Randomize words separated by
 .Xr isspace 3
@@ -116,7 +109,12 @@ The
 functionality to randomizing lines and words was added in 2003 by
 .An Sean Chittenden Aq Mt seanc at FreeBSD.org .
 .Sh BUGS
+This tool is a remnant of the "games" collection formerly part of
+.Fx
+base.
+It probably should have been removed to ports with the rest of that collection.
+It does not have a coherent purpose and the motivation for it to be a core base
+utility is nonobvious.
+.Pp
 No index is used when printing out tokens from the list which
 makes it rather slow for large files (10MB+).
-For smaller
-files, however, it should still be quite fast and efficient.

Modified: head/usr.bin/random/random.c
==============================================================================
--- head/usr.bin/random/random.c	Sat Feb 15 18:57:49 2020	(r357974)
+++ head/usr.bin/random/random.c	Sat Feb 15 19:13:37 2020	(r357975)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <fcntl.h>
 #include <limits.h>
 #include <locale.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -67,11 +68,12 @@ main(int argc, char *argv[])
 {
 	double denom;
 	int ch, fd, random_exit, randomize_lines, random_type, ret,
-		selected, unique_output, unbuffer_output;
+	    unique_output, unbuffer_output;
+	bool selected;
 	char *ep;
 	const char *filename;
 
-	denom = 0;
+	denom = 0.;
 	filename = "/dev/fd/0";
 	random_type = RANDOM_TYPE_UNSET;
 	random_exit = randomize_lines = unbuffer_output = 0;
@@ -119,16 +121,16 @@ main(int argc, char *argv[])
 
 	switch (argc) {
 	case 0:
-		denom = (randomize_lines ? 1 : 2);
+		denom = (randomize_lines ? 1. : 2.);
 		break;
 	case 1:
 		errno = 0;
 		denom = strtod(*argv, &ep);
 		if (errno == ERANGE)
 			err(1, "%s", *argv);
-		if (denom <= 0 || *ep != '\0')
+		if (denom < 1. || *ep != '\0')
 			errx(1, "denominator is not valid.");
-		if (random_exit && denom > 256)
+		if (random_exit && denom > 256.)
 			errx(1, "denominator must be <= 256 for random exit.");
 		break;
 	default:
@@ -160,24 +162,25 @@ main(int argc, char *argv[])
 		return (arc4random_uniform(denom));
 
 	/*
-	 * Select whether to print the first line.  (Prime the pump.)
-	 * We find a random number between 0 and denom - 1 and, if it's
-	 * 0 (which has a 1 / denom chance of being true), we select the
-	 * line.
+	 * Filter stdin, selecting lines with probability 1/denom, one
+	 * character at a time.
 	 */
-	selected = (arc4random_uniform(denom) == 0);
-	while ((ch = getchar()) != EOF) {
-		if (selected)
-			(void)putchar(ch);
-		if (ch == '\n') {
-			/* End of that line.  See if we got an error. */
-			if (ferror(stdout))
-				err(2, "stdout");
-
-			/* Now see if the next line is to be printed. */
-			selected = (arc4random_uniform(denom) == 0);
+	do {
+		selected = random_uniform_denom(denom);
+		if (selected) {
+			while ((ch = getchar()) != EOF) {
+				putchar(ch);
+				if (ch == '\n')
+					break;
+			}
+		} else {
+			while ((ch = getchar()) != EOF)
+				if (ch == '\n')
+					break;
 		}
-	}
+		if (ferror(stdout))
+			err(2, "stdout");
+	} while (ch != EOF);
 	if (ferror(stdin))
 		err(2, "stdin");
 	exit (0);

Modified: head/usr.bin/random/randomize_fd.c
==============================================================================
--- head/usr.bin/random/randomize_fd.c	Sat Feb 15 18:57:49 2020	(r357974)
+++ head/usr.bin/random/randomize_fd.c	Sat Feb 15 19:13:37 2020	(r357975)
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -218,7 +219,7 @@ make_token:
 				if (n->cp == NULL)
 					break;
 
-				if (arc4random_uniform(denom) == 0) {
+				if (random_uniform_denom(denom)) {
 					ret = printf("%.*s",
 						(int)n->len - 1, n->cp);
 					if (ret < 0)

Modified: head/usr.bin/random/randomize_fd.h
==============================================================================
--- head/usr.bin/random/randomize_fd.h	Sat Feb 15 18:57:49 2020	(r357974)
+++ head/usr.bin/random/randomize_fd.h	Sat Feb 15 19:13:37 2020	(r357975)
@@ -42,4 +42,40 @@ struct rand_node {
 
 int randomize_fd(int fd, int type, int unique, double denom);
 
+/*
+ * Generates a random number uniformly in the range [0.0, 1.0).
+ */
+static inline double
+random_unit_float(void)
+{
+	static const uint64_t denom = (1ull << 53);
+	static const uint64_t mask = denom - 1;
+
+	uint64_t rand64;
+
+	/*
+	 * arc4random_buf(...) in this use generates integer outputs in [0,
+	 * UINT64_MAX].
+	 *
+	 * The double mantissa only has 53 bits, so we uniformly mask off the
+	 * high 11 bits and then floating-point divide by 2^53 to achieve a
+	 * result in [0, 1).
+	 *
+	 * We are not allowed to emit 1.0, so denom must be one greater than
+	 * the possible range of the preceeding step.
+	 */
+	arc4random_buf(&rand64, sizeof(rand64));
+	rand64 &= mask;
+	return ((double)rand64 / denom);
+}
+
+/*
+ * Returns true with probability 1 / denom (a floating point number >= 1).
+ * Otherwise, returns false.
+ */
+static inline bool
+random_uniform_denom(double denom)
+{
+	return ((uint64_t)(denom * random_unit_float()) == 0);
+}
 #endif


More information about the svn-src-all mailing list