Page MenuHomeFreeBSD
Authored By
olce
Nov 30 2023, 5:50 PM
Size
4 KB
Referenced Files
None
Subscribers
None

D42345-olce.diff

diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
index 9353b1264449..331a0155b410 100644
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -2830,46 +2830,90 @@ linux_compat_init(void *arg)
* Generate a single-CPU cpumask_t for each CPU (possibly) in the system.
* CPUs are indexed from 0..(mp_maxid). The entry for cpuid 0 will only
* have itself in the cpumask, cupid 1 only itself on entry 1, and so on.
- * If we have more than (2 * _BITSET_BITS) CPUs we do magic to save a lot
- * of memory given the sets are static and we shift them out.
* This is used by cpumask_of() (and possibly others in the future) for,
* e.g., drivers to pass hints to irq_set_affinity_hint().
*/
static_single_cpu_mask = mallocarray(mp_maxid + 1,
sizeof(static_single_cpu_mask), M_KMALLOC, M_WAITOK | M_ZERO);
- if (mp_maxid < (2 * _BITSET_BITS)) {
+ /*
+ * When the number of CPUs reach this threshold, we start to save memory
+ * given the sets are static by overlapping those having their single
+ * bit set at same position in a bitset word. Asymptotically, this
+ * regular scheme is in O(n²) whereas the overlapping one is in O(n)
+ * only with n being the maximum number of CPUs, so the gain will become
+ * huge quite quickly. The threshold for 64-bit architectures is 128
+ * cores/CPUs.
+ */
+ if (mp_ncpus < (2 * _BITSET_BITS)) {
+ cpumask_t *sscm_ptr;
+
+ /*
+ * This represents 'mp_ncpus * __bitset_words(CPU_SETSIZE) *
+ * (_BITSET_BITS / 8)' bytes (for comparison with the
+ * overlapping scheme).
+ */
static_single_cpu_mask_lcs = mallocarray(mp_ncpus,
- sizeof(**static_single_cpu_mask), M_KMALLOC, M_WAITOK | M_ZERO);
+ sizeof(*static_single_cpu_mask_lcs),
+ M_KMALLOC, M_WAITOK | M_ZERO);
+
+ sscm_ptr = static_single_cpu_mask_lcs;
CPU_FOREACH(i) {
- static_single_cpu_mask[i] = static_single_cpu_mask_lcs++;
+ static_single_cpu_mask[i] = sscm_ptr++;
CPU_SET(i, static_single_cpu_mask[i]);
}
} else {
- cpumask_t *cs;
+ /* Pointer to a bitset word. */
+ __typeof(((cpuset_t *)NULL)->__bits[0]) *bwp;
/*
- * [[63..0]{n}[63..0]{n}] = { cpuset_t->__bits[], zeros }
- * We double allocate (in theory one _BITSET_BITS too much).
+ * Allocate memory for (static) spans of 'cpumask_t' ('cpuset_t'
+ * really) with a single bit set that can be reused for all
+ * single CPU masks by making start at different offsets. We
+ * need '__bitset_words(CPU_SETSIZE) - 1' bitset words before
+ * the word having its single bit set, and the same amount
+ * after.
*/
static_single_cpu_mask_lcs = mallocarray(_BITSET_BITS,
- 2 * sizeof(**static_single_cpu_mask), M_KMALLOC, M_WAITOK | M_ZERO);
+ (2 * __bitset_words(CPU_SETSIZE) - 1) * (_BITSET_BITS / 8),
+ M_KMALLOC, M_WAITOK | M_ZERO);
+
/*
- * Initialize the first static _BITSET_BITS sets,
- * leaving an entire "__bits[]" empty after so we can shift.
+ * We rely below on cpuset_t and the bitset generic
+ * implementation assigning words in the '__bits' array in the
+ * same order of bits (i.e., little-endian ordering, not to be
+ * confused with machine endianness, which concerns bits in
+ * words and other integers). This is an imperfect test, but it
+ * will detect a change to big-endian ordering.
*/
- for (i = 0; i < _BITSET_BITS; i++) {
- cs = static_single_cpu_mask_lcs + (2 * i);
- CPU_SET(i, cs);
- }
+ _Static_assert(
+ __bitset_word(_BITSET_BITS + 1, _BITSET_BITS) == 1,
+ "Assumes a bitset implementation that is little-endian "
+ "on his words");
+
/*
- * Now shift the sets through every _BITSET_BITS until we have
- * enough static sets for mp_maxid + 1.
+ * Initialize the single bit of each static span.
+ */
+ bwp = (__typeof(bwp))static_single_cpu_mask_lcs +
+ (__bitset_words(CPU_SETSIZE) - 1);
+ for (i = 0; i < _BITSET_BITS; i++) {
+ CPU_SET(i, (cpuset_t *)bwp);
+ bwp += (2 * __bitset_words(CPU_SETSIZE) - 1);
+ }
+
+ /*
+ * Finally set all CPU masks to the proper word in their
+ * relevant span.
*/
CPU_FOREACH(i) {
- cs = static_single_cpu_mask_lcs + ((i % _BITSET_BITS) * 2);
- cs = (cpuset_t *)(&cs->__bits[0] - (i / _BITSET_BITS));
- static_single_cpu_mask[i] = cs;
+ bwp = (__typeof(bwp))static_single_cpu_mask_lcs;
+ /* Find the non-zero word of the relevant span. */
+ bwp += (2 * __bitset_words(CPU_SETSIZE) - 1) *
+ (i % _BITSET_BITS) +
+ __bitset_words(CPU_SETSIZE) - 1;
+ /* Shift to find the CPU mask start. */
+ bwp -= (i / _BITSET_BITS);
+ static_single_cpu_mask[i] = (cpuset_t *)bwp;
}
}

File Metadata

Mime Type
text/x-diff
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
6830072
Default Alt Text
D42345-olce.diff (4 KB)

Event Timeline