Page MenuHomeFreeBSD

D47659.id147068.diff
No OneTemporary

D47659.id147068.diff

diff --git a/lib/libc/gen/arc4random.3 b/lib/libc/gen/arc4random.3
--- a/lib/libc/gen/arc4random.3
+++ b/lib/libc/gen/arc4random.3
@@ -30,7 +30,7 @@
.\"
.\" Manual page, using -mandoc macros
.\"
-.Dd April 13, 2020
+.Dd November 18, 2024
.Dt ARC4RANDOM 3
.Os
.Sh NAME
@@ -129,6 +129,17 @@
.%O Document ID: 4027b5256e17b9796842e6d0f68b0b5e
.%U http://cr.yp.to/papers.html#chacha
.Re
+.Rs
+.%A Daniel Lemire
+.%T Fast Random Integer Generation in an Interval
+.%D January 2019
+.%J ACM Trans. Model. Comput. Simul.
+.%I Association for Computing Machinery
+.%C New York, NY, USA
+.%V vol. 29
+.%N no. 1
+.%P pp. 1\(en12
+.Re
.Sh HISTORY
These functions first appeared in
.Ox 2.1 .
diff --git a/lib/libc/gen/arc4random_uniform.c b/lib/libc/gen/arc4random_uniform.c
--- a/lib/libc/gen/arc4random_uniform.c
+++ b/lib/libc/gen/arc4random_uniform.c
@@ -1,56 +1,47 @@
-/* $OpenBSD: arc4random_uniform.c,v 1.3 2019/01/20 02:59:07 bcook Exp $ */
-
-/*
- * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+/*-
+ * SPDX-License-Identifier: 0BSD
*
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
+ * Copyright (c) Robert Clausecker <fuz@FreeBSD.org>
+ * Based on a publication by Daniel Lemire.
+ * Public domain where applicable.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ * Daniel Lemire, "Fast Random Integer Generation in an Interval",
+ * Association for Computing Machinery, ACM Trans. Model. Comput. Simul.,
+ * no. 1, vol. 29, pp. 1--12, New York, NY, USA, January 2019.
*/
#include <stdint.h>
#include <stdlib.h>
-/*
- * Calculate a uniformly distributed random number less than upper_bound
- * avoiding "modulo bias".
- *
- * Uniformity is achieved by generating new random numbers until the one
- * returned is outside the range [0, 2**32 % upper_bound). This
- * guarantees the selected random number will be inside
- * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
- * after reduction modulo upper_bound.
- */
uint32_t
arc4random_uniform(uint32_t upper_bound)
{
- uint32_t r, min;
-
- if (upper_bound < 2)
- return 0;
-
- /* 2**32 % x == (2**32 - x) % x */
- min = -upper_bound % upper_bound;
+ uint64_t product;
/*
- * This could theoretically loop forever but each retry has
- * p > 0.5 (worst case, usually far better) of selecting a
- * number inside the range we need, so it should rarely need
- * to re-roll.
+ * The paper uses these variable names:
+ *
+ * L -- log2(UINT32_MAX+1)
+ * s -- upper_bound
+ * x -- arc4random() return value
+ * m -- product
+ * l -- (uint32_t)product
+ * t -- threshold
*/
- for (;;) {
- r = arc4random();
- if (r >= min)
- break;
+
+ if (upper_bound <= 1)
+ return (0);
+
+ product = upper_bound * (uint64_t)arc4random();
+
+ if ((uint32_t)product < upper_bound) {
+ uint32_t threshold;
+
+ /* threshold = (2**32 - upper_bound) % upper_bound */
+ threshold = -upper_bound % upper_bound;
+ while ((uint32_t)product < threshold)
+ product = upper_bound * (uint64_t)arc4random();
}
- return r % upper_bound;
+ return (product >> 32);
}
diff --git a/lib/libc/tests/gen/arc4random_test.c b/lib/libc/tests/gen/arc4random_test.c
--- a/lib/libc/tests/gen/arc4random_test.c
+++ b/lib/libc/tests/gen/arc4random_test.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2011 David Schultz
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +30,7 @@
#include <sys/wait.h>
#include <errno.h>
#include <stdio.h>
+#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -77,10 +79,34 @@
"sequences are the same");
}
+/*
+ * Test whether arc4random_uniform() returns a number below the given threshold.
+ * Test with various thresholds.
+ */
+ATF_TC_WITHOUT_HEAD(test_arc4random_uniform);
+ATF_TC_BODY(test_arc4random_uniform, tc)
+{
+ size_t i, j;
+ static const uint32_t thresholds[] = {
+ 1, 2, 3, 4, 5, 10, 100, 1000,
+ INT32_MAX, (uint32_t)INT32_MAX + 1,
+ UINT32_MAX - 1000000000, UINT32_MAX - 1000000, UINT32_MAX - 1, 0
+ };
+
+ for (i = 0; thresholds[i] != 0; i++)
+ for (j = 0; j < 10000; j++)
+ ATF_CHECK(arc4random_uniform(thresholds[i]) < thresholds[i]);
+
+ /* for a threshold of zero, just check that we get zero every time */
+ for (j = 0; j < 1000; j++)
+ ATF_CHECK_EQ(0, arc4random_uniform(0));
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, test_arc4random);
+ ATF_TP_ADD_TC(tp, test_arc4random_uniform);
return (atf_no_error());
}
diff --git a/tools/test/README b/tools/test/README
--- a/tools/test/README
+++ b/tools/test/README
@@ -1,4 +1,3 @@
-
This directory is for standalone test programs. For the FreeBSD
Test Suite, which uses Kyua, please see /usr/src/tests/
@@ -7,6 +6,7 @@
Please make a subdir per program, and add a brief description to this file.
+arc4random Bias test for arc4random_uniform()
auxinfo Return information on page sizes, CPUs, and OS release date.
devrandom Programs to test /dev/*random.
gpioevents Test delivery of gpio pin-change events to userland.
diff --git a/tools/test/arc4random/biastest.c b/tools/test/arc4random/biastest.c
new file mode 100644
--- /dev/null
+++ b/tools/test/arc4random/biastest.c
@@ -0,0 +1,216 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
+ *
+ * biastest.c -- bias test for arc4random_uniform().
+ *
+ * The default configuration of this test has an upper bound of
+ * (3/4) * UINT32_MAX, which should give a high amount of bias in
+ * an incorrect implementation. If the range reduction is
+ * implemented correctly, the parameters of the statistic should
+ * closely match the expected values. If not, they'll differ.
+ *
+ * For memory usage reasons, we use an uchar to track the number of
+ * observations per bucket. If the number of tries is much larger
+ * than upper_bound, the buckets likely overflow. This is detected
+ * by the test, but will lead to incorrect results.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <signal.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static void collect_sample(unsigned char *, long long, uint32_t);
+static void analyze_sample(const unsigned char *, long long, uint32_t);
+
+static atomic_bool complete = false;
+static long long tries = 5ULL << 32;
+static atomic_llong tries_done = 0;
+
+static void
+usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s [-n tries] [-t upper_bound]\n", argv0);
+ exit(EXIT_FAILURE);
+}
+
+int
+main(int argc, char *argv[])
+{
+ uint32_t threshold = 3UL << 30;
+ int ch;
+ unsigned char *sample;
+
+ while (ch = getopt(argc, argv, "n:t:"), ch != EOF)
+ switch (ch) {
+ case 'n':
+ tries = atoll(optarg);
+ break;
+
+ case 't':
+ threshold = (uint32_t)atoll(optarg);
+ break;
+
+ default:
+ usage(argv[0]);
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (threshold == 0) {
+ fprintf(stderr, "threshold must be between 1 and %lu\n", (unsigned long)UINT32_MAX);
+ exit(EXIT_FAILURE);
+ }
+
+ sample = calloc(threshold, 1);
+ if (sample == NULL) {
+ perror("calloc(threshold, 1)");
+ return (EXIT_FAILURE);
+ }
+
+ collect_sample(sample, tries, threshold);
+ analyze_sample(sample, tries, threshold);
+}
+
+static void
+progress(int signo)
+{
+ (void)signo;
+
+ if (!complete) {
+ fprintf(stderr, "\r%10lld of %10lld samples taken (%5.2f%% done)",
+ tries_done, tries, (tries_done * 100.0) / tries);
+
+ signal(SIGALRM, progress);
+ alarm(1);
+ }
+}
+
+static void
+collect_sample(unsigned char *sample, long long tries, uint32_t threshold)
+{
+ long long i;
+ uint32_t x;
+ bool overflowed = false;
+
+ progress(SIGALRM);
+
+ for (i = 0; i < tries; i++) {
+ x = arc4random_uniform(threshold);
+ tries_done++;
+ assert(x < threshold);
+
+ if (sample[x] == UCHAR_MAX) {
+ if (!overflowed) {
+ printf("sample table overflow, results will be incorrect\n");
+ overflowed = true;
+ }
+ } else
+ sample[x]++;
+ }
+
+ progress(SIGALRM);
+ complete = true;
+ fputc('\n', stderr);
+}
+
+static void
+analyze_sample(const unsigned char *sample, long long tries, uint32_t threshold)
+{
+ double discrepancy, average, variance, total;
+ long long histogram[UCHAR_MAX + 1] = { 0 }, sum, n, median;
+ uint32_t i, i_min, i_max;
+ int min, max;
+
+ printf("distribution properties:\n");
+
+ /* find median, average, deviation, smallest, and largest bucket */
+ total = 0.0;
+ for (i = 0; i < threshold; i++) {
+ histogram[sample[i]]++;
+ total += (double)i * sample[i];
+ }
+
+ average = total / tries;
+
+ variance = 0.0;
+ median = threshold;
+ n = 0;
+ i_min = 0;
+ i_max = 0;
+ min = sample[i_min];
+ max = sample[i_max];
+
+ for (i = 0; i < threshold; i++) {
+ discrepancy = i - average;
+ variance += sample[i] * discrepancy * discrepancy;
+
+ n += sample[i];
+ if (median == threshold && n > tries / 2)
+ median = i;
+
+ if (sample[i] < min) {
+ i_min = i;
+ min = sample[i_min];
+ } else if (sample[i] > max) {
+ i_max = i;
+ max = sample[i_max];
+ }
+ }
+
+ variance /= tries;
+ assert(median < threshold);
+
+ printf("\tthreshold: %lu\n", (unsigned long)threshold);
+ printf("\tobservations: %lld\n", tries);
+ printf("\tleast common: %lu (%d observations)\n", (unsigned long)i_min, min);
+ printf("\tmost common: %lu (%d observations)\n", (unsigned long)i_max, max);
+ printf("\tmedian: %lld (expected %lu)\n", median, (unsigned long)threshold / 2);
+ printf("\taverage: %f (expected %f)\n", average, 0.5 * (threshold - 1));
+ printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance),
+ sqrt(((double)threshold * threshold - 1.0) / 12));
+
+ /* build histogram and analyze it */
+ printf("sample properties:\n");
+
+ /* find median, average, and deviation */
+ average = (double)tries / threshold;
+
+ variance = 0.0;
+ for (i = 0; i < UCHAR_MAX; i++) {
+ discrepancy = i - average;
+ variance += histogram[i] * discrepancy * discrepancy;
+ }
+
+ variance /= threshold;
+
+ n = 0;
+ median = UCHAR_MAX + 1;
+ for (i = 0; i <= UCHAR_MAX; i++) {
+ n += histogram[i];
+ if (n >= threshold / 2) {
+ median = i;
+ break;
+ }
+ }
+
+ assert(median <= UCHAR_MAX); /* unreachable */
+
+ printf("\tmedian: %lld\n", median);
+ printf("\taverage: %f\n", average);
+ printf("\tdeviation: %f (expected %f)\n\n", sqrt(variance), sqrt(average * (1.0 - 1.0 / threshold)));
+
+ printf("histogram:\n");
+ for (i = 0; i < 256; i++)
+ if (histogram[i] != 0)
+ printf("\t%3d:\t%lld\n", (int)i, histogram[i]);
+}

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 8, 4:21 PM (7 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16528876
Default Alt Text
D47659.id147068.diff (10 KB)

Event Timeline