Page MenuHomeFreeBSD

D45393.diff
No OneTemporary

D45393.diff

diff --git a/sys/amd64/include/runq.h b/sys/amd64/include/runq.h
deleted file mode 100644
--- a/sys/amd64/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (bsfq(word))
-
-/*
- * Type of run queue status word.
- */
-typedef u_int64_t rqb_word_t;
-
-#endif
diff --git a/sys/arm/include/runq.h b/sys/arm/include/runq.h
deleted file mode 100644
--- a/sys/arm/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1 << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffs(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef u_int32_t rqb_word_t;
-
-#endif
diff --git a/sys/arm64/include/runq.h b/sys/arm64/include/runq.h
deleted file mode 100644
--- a/sys/arm64/include/runq.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*-
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __arm__
-#include <arm/runq.h>
-#else /* !__arm__ */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef unsigned long rqb_word_t;
-
-#endif
-
-#endif /* !__arm__ */
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -14307,7 +14307,7 @@
CTL_DEBUG_PRINT(("ctl_work_thread starting\n"));
thread_lock(curthread);
- sched_prio(curthread, PUSER - 1);
+ sched_prio(curthread, PRI_MAX_KERN);
thread_unlock(curthread);
while (!softc->shutdown) {
@@ -14399,7 +14399,7 @@
CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n"));
thread_lock(curthread);
- sched_prio(curthread, PUSER - 1);
+ sched_prio(curthread, PRI_MAX_KERN);
thread_unlock(curthread);
while (!softc->shutdown) {
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
@@ -44,7 +44,9 @@
#ifdef _KERNEL
#define CPU curcpu
#define minclsyspri PRIBIO
-#define defclsyspri minclsyspri
+#define defclsyspri minclsyspri
+/* Write issue taskq priority. */
+#define wtqclsyspri ((PVM + PRIBIO) / 2)
#define maxclsyspri PVM
#define max_ncpus (mp_maxid + 1)
#define boot_max_ncpus (mp_maxid + 1)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
@@ -91,8 +91,10 @@
* Treat shim tasks as SCHED_NORMAL tasks
*/
#define minclsyspri (MAX_PRIO-1)
-#define maxclsyspri (MAX_RT_PRIO)
#define defclsyspri (DEFAULT_PRIO)
+/* Write issue taskq priority. */
+#define wtqclsyspri (MAX_RT_PRIO + 1)
+#define maxclsyspri (MAX_RT_PRIO)
#ifndef NICE_TO_PRIO
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h
--- a/sys/contrib/openzfs/include/sys/zfs_context.h
+++ b/sys/contrib/openzfs/include/sys/zfs_context.h
@@ -622,8 +622,10 @@
* Process priorities as defined by setpriority(2) and getpriority(2).
*/
#define minclsyspri 19
-#define maxclsyspri -20
#define defclsyspri 0
+/* Write issue taskq priority. */
+#define wtqclsyspri -19
+#define maxclsyspri -20
#define CPU_SEQID ((uintptr_t)pthread_self() & (max_ncpus - 1))
#define CPU_SEQID_UNSTABLE CPU_SEQID
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -1158,29 +1158,14 @@
spa->spa_proc, zio_taskq_basedc, flags);
} else {
#endif
- pri_t pri = maxclsyspri;
/*
* The write issue taskq can be extremely CPU
* intensive. Run it at slightly less important
* priority than the other taskqs.
- *
- * Under Linux and FreeBSD this means incrementing
- * the priority value as opposed to platforms like
- * illumos where it should be decremented.
- *
- * On FreeBSD, if priorities divided by four (RQ_PPQ)
- * are equal then a difference between them is
- * insignificant.
*/
- if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) {
-#if defined(__linux__)
- pri++;
-#elif defined(__FreeBSD__)
- pri += 4;
-#else
-#error "unknown OS"
-#endif
- }
+ const pri_t pri = (t == ZIO_TYPE_WRITE &&
+ q == ZIO_TASKQ_ISSUE) ?
+ wtqclsyspri : maxclsyspri;
tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
#ifdef HAVE_SYSDC
diff --git a/sys/dev/beri/beri_ring.c b/sys/dev/beri/beri_ring.c
--- a/sys/dev/beri/beri_ring.c
+++ b/sys/dev/beri/beri_ring.c
@@ -170,7 +170,7 @@
}
mtx_lock(&sc->beri_mtx);
- selwakeuppri(&sc->beri_rsel, PZERO + 1);
+ selwakeuppri(&sc->beri_rsel, PZERO);
KNOTE_LOCKED(&sc->beri_rsel.si_note, 0);
mtx_unlock(&sc->beri_mtx);
}
@@ -190,7 +190,7 @@
}
mtx_lock(&sc->beri_mtx);
- selwakeuppri(&sc->beri_rsel, PZERO + 1);
+ selwakeuppri(&sc->beri_rsel, PZERO);
KNOTE_LOCKED(&sc->beri_rsel.si_note, 0);
mtx_unlock(&sc->beri_mtx);
}
diff --git a/sys/dev/firewire/firewirereg.h b/sys/dev/firewire/firewirereg.h
--- a/sys/dev/firewire/firewirereg.h
+++ b/sys/dev/firewire/firewirereg.h
@@ -293,7 +293,7 @@
extern devclass_t firewire_devclass;
extern int firewire_phydma_enable;
-#define FWPRI ((PZERO + 8) | PCATCH)
+#define FWPRI (PWAIT | PCATCH)
#define CALLOUT_INIT(x) callout_init(x, 1 /* mpsafe */)
diff --git a/sys/dev/syscons/syscons.c b/sys/dev/syscons/syscons.c
--- a/sys/dev/syscons/syscons.c
+++ b/sys/dev/syscons/syscons.c
@@ -1310,7 +1310,7 @@
if (i == sc->cur_scp->index)
return 0;
error =
- tsleep(VTY_WCHAN(sc, i), (PZERO + 1) | PCATCH, "waitvt", 0);
+ tsleep(VTY_WCHAN(sc, i), PZERO | PCATCH, "waitvt", 0);
return error;
case VT_GETACTIVE: /* get active vty # */
diff --git a/sys/dev/usb/usb_process.h b/sys/dev/usb/usb_process.h
--- a/sys/dev/usb/usb_process.h
+++ b/sys/dev/usb/usb_process.h
@@ -31,7 +31,6 @@
#ifndef USB_GLOBAL_INCLUDE_FILE
#include <sys/interrupt.h>
#include <sys/priority.h>
-#include <sys/runq.h>
#endif
/* defines */
diff --git a/sys/dev/vkbd/vkbd.c b/sys/dev/vkbd/vkbd.c
--- a/sys/dev/vkbd/vkbd.c
+++ b/sys/dev/vkbd/vkbd.c
@@ -82,7 +82,7 @@
#define VKBD_UNLOCK(s) mtx_unlock(&(s)->ks_lock)
#define VKBD_LOCK_ASSERT(s, w) mtx_assert(&(s)->ks_lock, w)
#define VKBD_SLEEP(s, f, d, t) \
- msleep(&(s)->f, &(s)->ks_lock, PCATCH | (PZERO + 1), d, t)
+ msleep(&(s)->f, &(s)->ks_lock, PCATCH | PZERO, d, t)
#else
#define VKBD_LOCK_DECL
#define VKBD_LOCK_INIT(s)
@@ -90,7 +90,7 @@
#define VKBD_LOCK(s)
#define VKBD_UNLOCK(s)
#define VKBD_LOCK_ASSERT(s, w)
-#define VKBD_SLEEP(s, f, d, t) tsleep(&(s)->f, PCATCH | (PZERO + 1), d, t)
+#define VKBD_SLEEP(s, f, d, t) tsleep(&(s)->f, PCATCH | PZERO, d, t)
#endif
#define VKBD_KEYBOARD(d) \
@@ -268,8 +268,8 @@
VKBD_SLEEP(state, ks_task, "vkbdc", 0);
/* wakeup poll()ers */
- selwakeuppri(&state->ks_rsel, PZERO + 1);
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_rsel, PZERO);
+ selwakeuppri(&state->ks_wsel, PZERO);
state->ks_flags &= ~OPEN;
state->ks_dev = NULL;
@@ -498,7 +498,7 @@
if (!(state->ks_flags & STATUS)) {
state->ks_flags |= STATUS;
- selwakeuppri(&state->ks_rsel, PZERO + 1);
+ selwakeuppri(&state->ks_rsel, PZERO);
wakeup(&state->ks_flags);
}
}
@@ -531,7 +531,7 @@
q->head = 0;
/* wakeup ks_inq writers/poll()ers */
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_wsel, PZERO);
wakeup(q);
return (c);
@@ -1246,7 +1246,7 @@
/* flush ks_inq and wakeup writers/poll()ers */
state->ks_inq.head = state->ks_inq.tail = state->ks_inq.cc = 0;
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_wsel, PZERO);
wakeup(&state->ks_inq);
}
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -152,7 +152,7 @@
FUSE_LOCK();
fuse_lck_mtx_lock(fdata->aw_mtx);
/* wakup poll()ers */
- selwakeuppri(&fdata->ks_rsel, PZERO + 1);
+ selwakeuppri(&fdata->ks_rsel, PZERO);
/* Don't let syscall handlers wait in vain */
while ((tick = fuse_aw_pop(fdata))) {
fuse_lck_mtx_lock(tick->tk_aw_mtx);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -932,7 +932,7 @@
if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
return EIO;
fvdat->flag |= FN_FLUSHWANT;
- tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
+ tsleep(&fvdat->flag, PRIBIO, "fusevinv", 2 * hz);
error = 0;
if (p != NULL) {
PROC_LOCK(p);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -593,7 +593,7 @@
fuse_lck_mtx_lock(data->ms_mtx);
data->dataflags |= FSESS_DEAD;
wakeup_one(data);
- selwakeuppri(&data->ks_rsel, PZERO + 1);
+ selwakeuppri(&data->ks_rsel, PZERO);
wakeup(&data->ticketer);
fuse_lck_mtx_unlock(data->ms_mtx);
FUSE_UNLOCK();
@@ -669,7 +669,7 @@
else
fuse_ms_push(ftick);
wakeup_one(ftick->tk_data);
- selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
+ selwakeuppri(&ftick->tk_data->ks_rsel, PZERO);
KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
}
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -4644,7 +4644,7 @@
ts.tv_sec = 0;
ts.tv_nsec = 0;
(void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR,
- PZERO - 1, "nfsndlck", &ts);
+ PVFS, "nfsndlck", &ts);
}
*flagp |= NFSR_SNDLOCK;
NFSUNLOCKSOCK();
diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c
--- a/sys/fs/nfsserver/nfs_nfsdcache.c
+++ b/sys/fs/nfsserver/nfs_nfsdcache.c
@@ -392,7 +392,7 @@
nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto loop;
}
@@ -678,7 +678,7 @@
rp = hitrp;
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto tryagain;
}
@@ -750,7 +750,7 @@
mtx_assert(mutex, MA_OWNED);
while ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
+ (void)mtx_sleep(rp, mutex, PVFS, "nfsrc", 0);
}
rp->rc_flag |= RC_LOCKED;
}
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -507,7 +507,7 @@
NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsd clp", 10 * hz);
}
NFSUNLOCKSTATE();
@@ -574,7 +574,7 @@
NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsdclp", 10 * hz);
}
NFSUNLOCKSTATE();
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -629,7 +629,7 @@
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
- error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", 2 * hz);
+ error = tsleep(&np->n_flag, PRIBIO, "smfsvinv", 2 * hz);
error = smb_td_intr(td);
if (error == EINTR)
return EINTR;
diff --git a/sys/i386/include/runq.h b/sys/i386/include/runq.h
deleted file mode 100644
--- a/sys/i386/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1 << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffs(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef u_int32_t rqb_word_t;
-
-#endif
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -1010,7 +1010,8 @@
mtx_lock(&rms->mtx);
while (rms->writers > 0)
- msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
+ msleep(&rms->readers, &rms->mtx, PRI_MAX_KERN,
+ mtx_name(&rms->mtx), 0);
critical_enter();
rms_int_readers_inc(rms, rms_int_pcpu(rms));
mtx_unlock(&rms->mtx);
@@ -1197,7 +1198,7 @@
mtx_lock(&rms->mtx);
rms->writers++;
if (rms->writers > 1) {
- msleep(&rms->owner, &rms->mtx, (PUSER - 1),
+ msleep(&rms->owner, &rms->mtx, PRI_MAX_KERN,
mtx_name(&rms->mtx), 0);
MPASS(rms->readers == 0);
KASSERT(rms->owner == RMS_TRANSIENT,
@@ -1213,7 +1214,7 @@
rms_assert_no_pcpu_readers(rms);
if (rms->readers > 0) {
- msleep(&rms->writers, &rms->mtx, (PUSER - 1),
+ msleep(&rms->writers, &rms->mtx, PRI_MAX_KERN,
mtx_name(&rms->mtx), 0);
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -38,6 +38,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -57,8 +58,6 @@
#endif
#endif
-CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
-
/*
* kern.sched.preemption allows user space to determine if preemption support
* is compiled in or not. It is not currently a boot or runtime flag that
@@ -253,6 +252,35 @@
/************************************************************************
* SYSTEM RUN QUEUE manipulations and tests *
************************************************************************/
+_Static_assert(RQSW_BPW == (1 << RQSW_L2BPW),
+ "RQSW_L2BPW and RQSW_BPW / 'rqsw_t' mismatch");
+_Static_assert(RQ_NQS <= 256,
+ "'td_rqindex' must be turned into a bigger unsigned type");
+/* A macro instead of a function to get the proper calling function's name. */
+#define CHECK_IDX(idx) ({ \
+ __typeof(idx) _idx __unused = (idx); \
+ KASSERT(0 <= _idx && _idx < RQ_NQS, \
+ ("%s: %s out of range: %d", __func__, __STRING(idx), _idx)); \
+})
+
+/* Status words' individual bit manipulators' internals. */
+typedef uintptr_t runq_sw_op(int idx, int sw_idx, rqsw_t sw_bit,
+ rqsw_t *swp);
+static inline uintptr_t runq_sw_apply(struct runq *rq, int idx,
+ runq_sw_op *op);
+
+static inline uintptr_t runq_sw_set_not_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+static inline uintptr_t runq_sw_set_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+static inline uintptr_t runq_sw_is_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+
+/* Status words' individual bit manipulators. */
+static inline void runq_sw_set_not_empty(struct runq *rq, int idx);
+static inline void runq_sw_set_empty(struct runq *rq, int idx);
+static inline bool runq_sw_is_empty(struct runq *rq, int idx);
+
/*
* Initialize a run structure.
*/
@@ -261,98 +289,96 @@
{
int i;
- bzero(rq, sizeof *rq);
+ bzero(rq, sizeof(*rq));
for (i = 0; i < RQ_NQS; i++)
TAILQ_INIT(&rq->rq_queues[i]);
}
/*
- * Clear the status bit of the queue corresponding to priority level pri,
- * indicating that it is empty.
+ * Helper to implement functions operating on a particular status word bit.
+ *
+ * The operator is passed the initial 'idx', the corresponding status word index
+ * in 'rq_status' in 'sw_idx', a status word with only that bit set in 'sw_bit'
+ * and a pointer to the corresponding status word in 'swp'.
*/
-static __inline void
-runq_clrbit(struct runq *rq, int pri)
+static inline uintptr_t
+runq_sw_apply(struct runq *rq, int idx, runq_sw_op *op)
{
- struct rqbits *rqb;
+ rqsw_t *swp;
+ rqsw_t sw_bit;
+ int sw_idx;
- rqb = &rq->rq_status;
- CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
- rqb->rqb_bits[RQB_WORD(pri)],
- rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
- RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
+ CHECK_IDX(idx);
+
+ sw_idx = RQSW_IDX(idx);
+ sw_bit = RQSW_BIT(idx);
+ swp = &rq->rq_status.rq_sw[sw_idx];
+
+ return (op(idx, sw_idx, sw_bit, swp));
}
/*
- * Find the index of the first non-empty run queue. This is done by
- * scanning the status bits, a set bit indicates a non-empty queue.
+ * Modify the status words to indicate that some queue is not empty.
+ *
+ * Sets the status bit corresponding to the queue at index 'idx'.
*/
-static __inline int
-runq_findbit(struct runq *rq)
+static inline uintptr_t
+runq_sw_set_not_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
{
- struct rqbits *rqb;
- int pri;
- int i;
+ rqsw_t old_sw __unused = *swp;
- rqb = &rq->rq_status;
- for (i = 0; i < RQB_LEN; i++)
- if (rqb->rqb_bits[i]) {
- pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
- CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
- rqb->rqb_bits[i], i, pri);
- return (pri);
- }
-
- return (-1);
+ *swp |= sw_bit;
+ CTR4(KTR_RUNQ, "runq_sw_set_not_empty: idx=%d sw_idx=%d bits=%#x->%#x",
+ idx, sw_idx, old_sw, *swp);
+ return (0);
}
-
-static __inline int
-runq_findbit_from(struct runq *rq, u_char pri)
+static inline void
+runq_sw_set_not_empty(struct runq *rq, int idx)
{
- struct rqbits *rqb;
- rqb_word_t mask;
- int i;
-
- /*
- * Set the mask for the first word so we ignore priorities before 'pri'.
- */
- mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
- rqb = &rq->rq_status;
-again:
- for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
- mask = rqb->rqb_bits[i] & mask;
- if (mask == 0)
- continue;
- pri = RQB_FFS(mask) + (i << RQB_L2BPW);
- CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
- mask, i, pri);
- return (pri);
- }
- if (pri == 0)
- return (-1);
- /*
- * Wrap back around to the beginning of the list just once so we
- * scan the whole thing.
- */
- pri = 0;
- goto again;
+ (void)runq_sw_apply(rq, idx, &runq_sw_set_not_empty_op);
}
/*
- * Set the status bit of the queue corresponding to priority level pri,
- * indicating that it is non-empty.
+ * Modify the status words to indicate that some queue is empty.
+ *
+ * Clears the status bit corresponding to the queue at index 'idx'.
*/
-static __inline void
-runq_setbit(struct runq *rq, int pri)
+static inline uintptr_t
+runq_sw_set_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
{
- struct rqbits *rqb;
+ rqsw_t old_sw __unused = *swp;
- rqb = &rq->rq_status;
- CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
- rqb->rqb_bits[RQB_WORD(pri)],
- rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
- RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
+ *swp &= ~sw_bit;
+ CTR4(KTR_RUNQ, "runq_sw_set_empty: idx=%d sw_idx=%d bits=%#x->%#x",
+ idx, sw_idx, old_sw, *swp);
+ return (0);
+}
+static inline void
+runq_sw_set_empty(struct runq *rq, int idx)
+{
+ (void)runq_sw_apply(rq, idx, &runq_sw_set_empty_op);
+}
+
+/*
+ * Returns whether the status words indicate that some queue is empty.
+ */
+static inline uintptr_t
+runq_sw_is_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
+{
+ return ((*swp & sw_bit) == 0);
+}
+static inline bool
+runq_sw_is_empty(struct runq *rq, int idx)
+{
+ return (runq_sw_apply(rq, idx, &runq_sw_is_empty_op));
+}
+
+/*
+ * Returns whether a particular queue is empty.
+ */
+bool runq_is_queue_empty(struct runq *rq, int idx)
+{
+ return (runq_sw_is_empty(rq, idx));
}
/*
@@ -362,102 +388,183 @@
void
runq_add(struct runq *rq, struct thread *td, int flags)
{
- struct rqhead *rqh;
- int pri;
- pri = td->td_priority / RQ_PPQ;
- td->td_rqindex = pri;
- runq_setbit(rq, pri);
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
- td, td->td_priority, pri, rqh);
- if (flags & SRQ_PREEMPTED) {
- TAILQ_INSERT_HEAD(rqh, td, td_runq);
- } else {
- TAILQ_INSERT_TAIL(rqh, td, td_runq);
- }
+ runq_add_idx(rq, td, RQ_PRI_TO_QUEUE_IDX(td->td_priority), flags);
}
void
-runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
+runq_add_idx(struct runq *rq, struct thread *td, int idx, int flags)
{
- struct rqhead *rqh;
+ struct rq_queue *rqq;
- KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
- td->td_rqindex = pri;
- runq_setbit(rq, pri);
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
- td, td->td_priority, pri, rqh);
- if (flags & SRQ_PREEMPTED) {
- TAILQ_INSERT_HEAD(rqh, td, td_runq);
- } else {
- TAILQ_INSERT_TAIL(rqh, td, td_runq);
- }
+ /*
+ * runq_sw_*() functions assert that 'idx' is non-negative and below
+ * 'RQ_NQS', and a static assert upper in this file ensures that
+ * 'RQ_NQS' is no more than 256.
+ */
+ td->td_rqindex = idx;
+ runq_sw_set_not_empty(rq, idx);
+ rqq = &rq->rq_queues[idx];
+ CTR4(KTR_RUNQ, "runq_add_idx: td=%p pri=%d idx=%d rqq=%p",
+ td, td->td_priority, idx, rqq);
+ if (flags & SRQ_PREEMPTED)
+ TAILQ_INSERT_HEAD(rqq, td, td_runq);
+ else
+ TAILQ_INSERT_TAIL(rqq, td, td_runq);
}
+
/*
- * Return true if there are runnable processes of any priority on the run
- * queue, false otherwise. Has no side effects, does not modify the run
- * queue structure.
+ * Remove the thread from the queue specified by its priority, and clear the
+ * corresponding status bit if the queue becomes empty.
+ *
+ * Returns whether the corresponding queue is empty after removal.
+ */
+bool
+runq_remove(struct runq *rq, struct thread *td)
+{
+ struct rq_queue *rqq;
+ int idx;
+
+ KASSERT(td->td_flags & TDF_INMEM, ("runq_remove: Thread swapped out"));
+ idx = td->td_rqindex;
+ CHECK_IDX(idx);
+ rqq = &rq->rq_queues[idx];
+ CTR4(KTR_RUNQ, "runq_remove: td=%p pri=%d idx=%d rqq=%p",
+ td, td->td_priority, idx, rqq);
+ TAILQ_REMOVE(rqq, td, td_runq);
+ if (TAILQ_EMPTY(rqq)) {
+ runq_sw_set_empty(rq, idx);
+ CTR1(KTR_RUNQ, "runq_remove: queue at idx=%d now empty", idx);
+ return (true);
+ }
+ return (false);
+}
+
+static inline int
+runq_findq_status_word(struct runq *const rq, const int w_idx,
+ const rqsw_t w, runq_pred_t *const pred, void *const pred_data)
+{
+ struct rq_queue *q;
+ rqsw_t tw = w;
+ int idx, b_idx;
+
+ while (tw != 0) {
+ b_idx = RQSW_BSF(tw);
+ idx = RQSW_TO_QUEUE_IDX(w_idx, b_idx);
+ q = &rq->rq_queues[idx];
+ KASSERT(!TAILQ_EMPTY(q),
+ ("runq_findq(): No thread on non-empty queue with idx=%d",
+ idx));
+ if (pred(idx, q, pred_data))
+ return (idx);
+ tw &= ~RQSW_BIT(idx);
+ }
+
+ return (-1);
+}
+
+/*
+ * Find in the passed range (bounds included) the index of the first (i.e.,
+ * having lower index) non-empty queue that passes pred().
+ *
+ * Considered queues are those with index 'lvl_min' up to 'lvl_max' (bounds
+ * included). If no queue matches, returns -1.
+ *
+ * This is done by scanning the status words (a set bit indicates a non-empty
+ * queue) and calling pred() with corresponding queue indices. pred() must
+ * return whether the corresponding queue is accepted. It is passed private
+ * data through 'pred_data', which can be used both for extra input and output.
*/
int
-runq_check(struct runq *rq)
+runq_findq(struct runq *const rq, const int lvl_min, const int lvl_max,
+ runq_pred_t *const pred, void *const pred_data)
{
- struct rqbits *rqb;
- int i;
+ rqsw_t const (*const rqsw)[RQSW_NB] = &rq->rq_status.rq_sw;
+ rqsw_t w;
+ int i, last, idx;
- rqb = &rq->rq_status;
- for (i = 0; i < RQB_LEN; i++)
- if (rqb->rqb_bits[i]) {
- CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
- rqb->rqb_bits[i], i);
- return (1);
- }
- CTR0(KTR_RUNQ, "runq_check: empty");
+ CHECK_IDX(lvl_min);
+ CHECK_IDX(lvl_max);
+ KASSERT(lvl_min <= lvl_max,
+ ("lvl_min: %d > lvl_max: %d!", lvl_min, lvl_max));
- return (0);
+ i = RQSW_IDX(lvl_min);
+ last = RQSW_IDX(lvl_max);
+ /* Clear bits for runqueues below 'lvl_min'. */
+ w = (*rqsw)[i] & ~(RQSW_BIT(lvl_min) - 1);
+ if (i == last)
+ goto last_mask;
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+
+ for (++i; i < last; ++i) {
+ w = (*rqsw)[i];
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+ }
+
+ MPASS(i == last);
+ w = (*rqsw)[i];
+last_mask:
+ /* Clear bits for runqueues above 'lvl_max'. */
+ w &= (RQSW_BIT(lvl_max) - 1) | RQSW_BIT(lvl_max);
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+ return (-1);
+return_idx:
+ CTR4(KTR_RUNQ, "runq_findq: bits=%#x->%#x i=%d idx=%d",
+ (*rqsw)[i], w, i, idx);
+ return (idx);
+}
+
+static bool
+runq_first_thread_pred(const int idx, struct rq_queue *const q, void *const data)
+{
+ struct thread **const tdp = data;
+ struct thread *const td = TAILQ_FIRST(q);
+
+ *tdp = td;
+ return (true);
+}
+
+/* Make sure it has an external definition. */
+extern inline struct thread *
+runq_first_thread_range(struct runq *const rq, const int lvl_min,
+ const int lvl_max)
+{
+ struct thread *td = NULL;
+
+ (void)runq_findq(rq, lvl_min, lvl_max, runq_first_thread_pred, &td);
+ return (td);
+}
+
+static inline struct thread *
+runq_first_thread(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, 0, RQ_NQS - 1));
}
/*
- * Find the highest priority process on the run queue.
+ * Return true if there are some processes of any priority on the run queue,
+ * false otherwise. Has no side effects.
*/
-struct thread *
-runq_choose_fuzz(struct runq *rq, int fuzz)
+bool
+runq_not_empty(struct runq *rq)
{
- struct rqhead *rqh;
- struct thread *td;
- int pri;
+ struct thread *const td = runq_first_thread(rq);
- while ((pri = runq_findbit(rq)) != -1) {
- rqh = &rq->rq_queues[pri];
- /* fuzz == 1 is normal.. 0 or less are ignored */
- if (fuzz > 1) {
- /*
- * In the first couple of entries, check if
- * there is one for our CPU as a preference.
- */
- int count = fuzz;
- int cpu = PCPU_GET(cpuid);
- struct thread *td2;
- td2 = td = TAILQ_FIRST(rqh);
-
- while (count-- && td2) {
- if (td2->td_lastcpu == cpu) {
- td = td2;
- break;
- }
- td2 = TAILQ_NEXT(td2, td_runq);
- }
- } else
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
- CTR3(KTR_RUNQ,
- "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
- return (td);
+ if (td != NULL) {
+ CTR2(KTR_RUNQ, "runq_not_empty: idx=%d, td=%p",
+ td->td_rqindex, td);
+ return (true);
}
- CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
- return (NULL);
+ CTR0(KTR_RUNQ, "runq_not_empty: empty");
+ return (false);
}
/*
@@ -466,73 +573,74 @@
struct thread *
runq_choose(struct runq *rq)
{
- struct rqhead *rqh;
struct thread *td;
- int pri;
- while ((pri = runq_findbit(rq)) != -1) {
- rqh = &rq->rq_queues[pri];
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
- CTR3(KTR_RUNQ,
- "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
+ td = runq_first_thread(rq);
+ if (td != NULL) {
+ CTR2(KTR_RUNQ, "runq_choose: idx=%d td=%p", td->td_rqindex, td);
return (td);
}
- CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
+ CTR0(KTR_RUNQ, "runq_choose: idlethread");
return (NULL);
}
-struct thread *
-runq_choose_from(struct runq *rq, u_char idx)
+struct runq_fuzz_pred_data {
+ int fuzz;
+ struct thread *td;
+};
+
+static bool
+runq_fuzz_pred(const int idx, struct rq_queue *const q, void *const data)
{
- struct rqhead *rqh;
+ struct runq_fuzz_pred_data *const d = data;
+ const int fuzz = d->fuzz;
struct thread *td;
- int pri;
- if ((pri = runq_findbit_from(rq, idx)) != -1) {
- rqh = &rq->rq_queues[pri];
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
- CTR4(KTR_RUNQ,
- "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
- pri, td, td->td_rqindex, rqh);
- return (td);
+ td = TAILQ_FIRST(q);
+
+ if (fuzz > 1) {
+ /*
+ * In the first couple of entries, check if
+ * there is one for our CPU as a preference.
+ */
+ struct thread *td2 = td;
+ int count = fuzz;
+ int cpu = PCPU_GET(cpuid);
+
+ while (count-- != 0 && td2 != NULL) {
+ if (td2->td_lastcpu == cpu) {
+ td = td2;
+ break;
+ }
+ td2 = TAILQ_NEXT(td2, td_runq);
+ }
}
- CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
- return (NULL);
+ d->td = td;
+ return (true);
}
+
/*
- * Remove the thread from the queue specified by its priority, and clear the
- * corresponding status bit if the queue becomes empty.
- * Caller must set state afterwards.
+ * Find the highest priority process on the run queue.
*/
-void
-runq_remove(struct runq *rq, struct thread *td)
+struct thread *
+runq_choose_fuzz(struct runq *rq, int fuzz)
{
+ struct runq_fuzz_pred_data data = {
+ .fuzz = fuzz,
+ .td = NULL
+ };
+ int idx;
- runq_remove_idx(rq, td, NULL);
-}
-
-void
-runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
-{
- struct rqhead *rqh;
- u_char pri;
-
- KASSERT(td->td_flags & TDF_INMEM,
- ("runq_remove_idx: thread swapped out"));
- pri = td->td_rqindex;
- KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
- td, td->td_priority, pri, rqh);
- TAILQ_REMOVE(rqh, td, td_runq);
- if (TAILQ_EMPTY(rqh)) {
- CTR0(KTR_RUNQ, "runq_remove_idx: empty");
- runq_clrbit(rq, pri);
- if (idx != NULL && *idx == pri)
- *idx = (pri + 1) % RQ_NQS;
+ idx = runq_findq(rq, 0, RQ_NQS - 1, runq_fuzz_pred, &data);
+ if (idx != -1) {
+ MPASS(data.td != NULL);
+ CTR2(KTR_RUNQ, "runq_choose_fuzz: idx=%d td=%p", idx, data.td);
+ return (data.td);
}
+
+ MPASS(data.td == NULL);
+ CTR0(KTR_RUNQ, "runq_choose_fuzz: idlethread");
+ return (NULL);
}
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -48,6 +48,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/smp.h>
@@ -72,15 +73,17 @@
* INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
* the range 100-256 Hz (approximately).
*/
-#define ESTCPULIM(e) \
- min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \
- RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1)
#ifdef SMP
#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus)
#else
#define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */
#endif
#define NICE_WEIGHT 1 /* Priorities per nice level. */
+#define ESTCPULIM(e) \
+ min((e), INVERSE_ESTCPU_WEIGHT * \
+ (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) + \
+ PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) \
+ + INVERSE_ESTCPU_WEIGHT - 1)
#define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
@@ -683,13 +686,14 @@
/* Nothing needed. */
}
-int
+bool
sched_runnable(void)
{
#ifdef SMP
- return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]);
+ return (runq_not_empty(&runq) ||
+ runq_not_empty(&runq_pcpu[PCPU_GET(cpuid)]));
#else
- return runq_check(&runq);
+ return (runq_not_empty(&runq));
#endif
}
@@ -871,7 +875,7 @@
if (td->td_priority == prio)
return;
td->td_priority = prio;
- if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {
+ if (TD_ON_RUNQ(td) && td->td_rqindex != RQ_PRI_TO_QUEUE_IDX(prio)) {
sched_rem(td);
sched_add(td, SRQ_BORING | SRQ_HOLDTD);
}
@@ -1682,7 +1686,7 @@
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
- while (sched_runnable() == 0) {
+ while (!sched_runnable()) {
cpu_idle(stat->idlecalls + stat->oldidlecalls > 64);
stat->idlecalls++;
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -52,6 +52,7 @@
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/smp.h>
@@ -87,10 +88,9 @@
* Thread scheduler specific section. All fields are protected
* by the thread lock.
*/
-struct td_sched {
- struct runq *ts_runq; /* Run-queue we're queued on. */
+struct td_sched {
short ts_flags; /* TSF_* flags. */
- int ts_cpu; /* CPU that we have affinity for. */
+ int ts_cpu; /* CPU we are on, or were last on. */
int ts_rltick; /* Real last tick, for affinity. */
int ts_slice; /* Ticks of slice remaining. */
u_int ts_slptime; /* Number of ticks we vol. slept */
@@ -130,23 +130,6 @@
#define PRI_MIN_BATCH (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE)
#define PRI_MAX_BATCH PRI_MAX_TIMESHARE
-/*
- * Cpu percentage computation macros and defines.
- *
- * SCHED_TICK_SECS: Number of seconds to average the cpu usage across.
- * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across.
- * SCHED_TICK_MAX: Maximum number of ticks before scaling back.
- * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results.
- * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count.
- * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks.
- */
-#define SCHED_TICK_SECS 10
-#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS)
-#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz)
-#define SCHED_TICK_SHIFT 10
-#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT)
-#define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz))
-
/*
* These macros determine priorities for non-interactive threads. They are
* assigned a priority based on their recent cpu utilization as expressed
@@ -169,6 +152,48 @@
(roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE))
#define SCHED_PRI_NICE(nice) (nice)
+/*
+ * Runqueue indices for the implemented scheduling policies' priority bounds.
+ *
+ * In ULE's implementation, realtime policy covers the ITHD, REALTIME and
+ * INTERACT (see above) ranges, timesharing the BATCH range (see above), and
+ * idle policy the IDLE range.
+ *
+ * Priorities from these ranges must not be assigned to the same runqueue's
+ * queue.
+ */
+#define RQ_RT_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_ITHD))
+#define RQ_RT_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_INTERACT))
+#define RQ_TS_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_BATCH))
+#define RQ_TS_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_BATCH))
+#define RQ_ID_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_IDLE))
+#define RQ_ID_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_IDLE))
+
+_Static_assert(RQ_RT_POL_MAX != RQ_TS_POL_MIN,
+ "ULE's realtime and timeshare policies' runqueue ranges overlap");
+_Static_assert(RQ_TS_POL_MAX != RQ_ID_POL_MIN,
+ "ULE's timeshare and idle policies' runqueue ranges overlap");
+
+/* Helper to treat the timeshare range as a circular group of queues. */
+#define RQ_TS_POL_MODULO (RQ_TS_POL_MAX - RQ_TS_POL_MIN + 1)
+
+/*
+ * Cpu percentage computation macros and defines.
+ *
+ * SCHED_TICK_SECS: Number of seconds to average the cpu usage across.
+ * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across.
+ * SCHED_TICK_MAX: Maximum number of ticks before scaling back.
+ * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results.
+ * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count.
+ * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks.
+ */
+#define SCHED_TICK_SECS 10
+#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS)
+#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz)
+#define SCHED_TICK_SHIFT 10
+#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT)
+#define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz))
+
/*
* These determine the interactivity of a process. Interactivity differs from
* cpu utilization in that it expresses the voluntary time slept vs time ran
@@ -252,12 +277,10 @@
short tdq_oldswitchcnt; /* (l) Switches last tick. */
u_char tdq_lowpri; /* (ts) Lowest priority thread. */
u_char tdq_owepreempt; /* (f) Remote preemption pending. */
- u_char tdq_idx; /* (t) Current insert index. */
- u_char tdq_ridx; /* (t) Current removal index. */
+ u_char tdq_ts_off; /* (t) TS insertion offset. */
+ u_char tdq_ts_deq_off; /* (t) TS dequeue offset. */
int tdq_id; /* (c) cpuid. */
- struct runq tdq_realtime; /* (t) real-time run queue. */
- struct runq tdq_timeshare; /* (t) timeshare run queue. */
- struct runq tdq_idle; /* (t) Queue of IDLE threads. */
+ struct runq tdq_runq; /* (t) Run queue. */
char tdq_name[TDQ_NAME_LEN];
#ifdef KTR
char tdq_loadname[TDQ_LOADNAME_LEN];
@@ -329,12 +352,17 @@
static void sched_pctcpu_update(struct td_sched *, int);
/* Operations on per processor queues */
+static inline struct thread *runq_choose_realtime(struct runq *const rq);
+static inline struct thread *runq_choose_timeshare(struct runq *const rq,
+ int off);
+static inline struct thread *runq_choose_idle(struct runq *const rq);
static struct thread *tdq_choose(struct tdq *);
+
static void tdq_setup(struct tdq *, int i);
static void tdq_load_add(struct tdq *, struct thread *);
static void tdq_load_rem(struct tdq *, struct thread *);
-static __inline void tdq_runq_add(struct tdq *, struct thread *, int);
-static __inline void tdq_runq_rem(struct tdq *, struct thread *);
+static inline void tdq_runq_add(struct tdq *, struct thread *, int);
+static inline void tdq_runq_rem(struct tdq *, struct thread *);
static inline int sched_shouldpreempt(int, int, int);
static void tdq_print(int cpu);
static void runq_print(struct runq *rq);
@@ -343,8 +371,19 @@
static int tdq_move(struct tdq *, struct tdq *);
static int tdq_idled(struct tdq *);
static void tdq_notify(struct tdq *, int lowpri);
+
+static bool runq_steal_pred(const int idx, struct rq_queue *const q,
+ void *const data);
+static inline struct thread *runq_steal_range(struct runq *const rq,
+ const int lvl_min, const int lvl_max, int cpu);
+static inline struct thread *runq_steal_realtime(struct runq *const rq,
+ int cpu);
+static inline struct thread *runq_steal_timeshare(struct runq *const rq,
+ int cpu, int off);
+static inline struct thread *runq_steal_idle(struct runq *const rq,
+ int cpu);
static struct thread *tdq_steal(struct tdq *, int);
-static struct thread *runq_steal(struct runq *, int);
+
static int sched_pickcpu(struct thread *, int);
static void sched_balance(void);
static bool sched_balance_pair(struct tdq *, struct tdq *);
@@ -386,20 +425,20 @@
static void
runq_print(struct runq *rq)
{
- struct rqhead *rqh;
+ struct rq_queue *rqq;
struct thread *td;
int pri;
int j;
int i;
- for (i = 0; i < RQB_LEN; i++) {
+ for (i = 0; i < RQSW_NB; i++) {
printf("\t\trunq bits %d 0x%zx\n",
- i, rq->rq_status.rqb_bits[i]);
- for (j = 0; j < RQB_BPW; j++)
- if (rq->rq_status.rqb_bits[i] & (1ul << j)) {
- pri = j + (i << RQB_L2BPW);
- rqh = &rq->rq_queues[pri];
- TAILQ_FOREACH(td, rqh, td_runq) {
+ i, rq->rq_status.rq_sw[i]);
+ for (j = 0; j < RQSW_BPW; j++)
+ if (rq->rq_status.rq_sw[i] & (1ul << j)) {
+ pri = RQSW_TO_QUEUE_IDX(i, j);
+ rqq = &rq->rq_queues[pri];
+ TAILQ_FOREACH(td, rqq, td_runq) {
printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n",
td, td->td_name, td->td_priority,
td->td_rqindex, pri);
@@ -419,21 +458,17 @@
tdq = TDQ_CPU(cpu);
printf("tdq %d:\n", TDQ_ID(tdq));
- printf("\tlock %p\n", TDQ_LOCKPTR(tdq));
- printf("\tLock name: %s\n", tdq->tdq_name);
- printf("\tload: %d\n", tdq->tdq_load);
- printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt);
- printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
- printf("\ttimeshare idx: %d\n", tdq->tdq_idx);
- printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx);
+ printf("\tlock %p\n", TDQ_LOCKPTR(tdq));
+ printf("\tLock name: %s\n", tdq->tdq_name);
+ printf("\tload: %d\n", tdq->tdq_load);
+ printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt);
+ printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
+ printf("\tTS insert offset: %d\n", tdq->tdq_ts_off);
+ printf("\tTS dequeue offset: %d\n", tdq->tdq_ts_deq_off);
printf("\tload transferable: %d\n", tdq->tdq_transferable);
printf("\tlowest priority: %d\n", tdq->tdq_lowpri);
- printf("\trealtime runq:\n");
- runq_print(&tdq->tdq_realtime);
- printf("\ttimeshare runq:\n");
- runq_print(&tdq->tdq_timeshare);
- printf("\tidle runq:\n");
- runq_print(&tdq->tdq_idle);
+ printf("\trunq:\n");
+ runq_print(&tdq->tdq_runq);
}
static inline int
@@ -474,11 +509,11 @@
* date with what is actually on the run-queue. Selects the correct
* queue position for timeshare threads.
*/
-static __inline void
+static inline void
tdq_runq_add(struct tdq *tdq, struct thread *td, int flags)
{
struct td_sched *ts;
- u_char pri;
+ u_char pri, idx;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
@@ -490,62 +525,68 @@
tdq->tdq_transferable++;
ts->ts_flags |= TSF_XFERABLE;
}
- if (pri < PRI_MIN_BATCH) {
- ts->ts_runq = &tdq->tdq_realtime;
- } else if (pri <= PRI_MAX_BATCH) {
- ts->ts_runq = &tdq->tdq_timeshare;
- KASSERT(pri <= PRI_MAX_BATCH && pri >= PRI_MIN_BATCH,
- ("Invalid priority %d on timeshare runq", pri));
+ if (PRI_MIN_BATCH <= pri && pri <= PRI_MAX_BATCH) {
/*
- * This queue contains only priorities between MIN and MAX
- * batch. Use the whole queue to represent these values.
+ * The queues allocated to the batch range are not used as
+ * a simple array but as a "circular" one where the insertion
+ * index (derived from 'pri') is offset by 'tdq_ts_off'. 'idx'
+ * is first set to the offset of the wanted queue in the TS'
+ * selection policy range.
*/
- if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) == 0) {
- pri = RQ_NQS * (pri - PRI_MIN_BATCH) / PRI_BATCH_RANGE;
- pri = (pri + tdq->tdq_idx) % RQ_NQS;
+ if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) != 0)
+ /* Current queue from which processes are being run. */
+ idx = tdq->tdq_ts_deq_off;
+ else {
+ idx = (RQ_PRI_TO_QUEUE_IDX(pri) - RQ_TS_POL_MIN +
+ tdq->tdq_ts_off) % RQ_TS_POL_MODULO;
/*
- * This effectively shortens the queue by one so we
- * can have a one slot difference between idx and
- * ridx while we wait for threads to drain.
+ * We avoid enqueuing low priority threads in the queue
+ * that we are still draining, effectively shortening
+ * the runqueue by one queue.
*/
- if (tdq->tdq_ridx != tdq->tdq_idx &&
- pri == tdq->tdq_ridx)
- pri = (unsigned char)(pri - 1) % RQ_NQS;
- } else
- pri = tdq->tdq_ridx;
- runq_add_pri(ts->ts_runq, td, pri, flags);
- return;
+ if (tdq->tdq_ts_deq_off != tdq->tdq_ts_off &&
+ idx == tdq->tdq_ts_deq_off)
+ /* Ensure the dividend is positive. */
+ idx = (idx - 1 + RQ_TS_POL_MODULO) %
+ RQ_TS_POL_MODULO;
+ }
+ /* Absolute queue index. */
+ idx += RQ_TS_POL_MIN;
+ runq_add_idx(&tdq->tdq_runq, td, idx, flags);
} else
- ts->ts_runq = &tdq->tdq_idle;
- runq_add(ts->ts_runq, td, flags);
+ runq_add(&tdq->tdq_runq, td, flags);
}
-/*
+/*
* Remove a thread from a run-queue. This typically happens when a thread
* is selected to run. Running threads are not on the queue and the
* transferable count does not reflect them.
*/
-static __inline void
+static inline void
tdq_runq_rem(struct tdq *tdq, struct thread *td)
{
struct td_sched *ts;
+ bool queue_empty;
ts = td_get_sched(td);
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
- KASSERT(ts->ts_runq != NULL,
- ("tdq_runq_remove: thread %p null ts_runq", td));
if (ts->ts_flags & TSF_XFERABLE) {
tdq->tdq_transferable--;
ts->ts_flags &= ~TSF_XFERABLE;
}
- if (ts->ts_runq == &tdq->tdq_timeshare) {
- if (tdq->tdq_idx != tdq->tdq_ridx)
- runq_remove_idx(ts->ts_runq, td, &tdq->tdq_ridx);
- else
- runq_remove_idx(ts->ts_runq, td, NULL);
- } else
- runq_remove(ts->ts_runq, td);
+ queue_empty = runq_remove(&tdq->tdq_runq, td);
+ /*
+ * If thread has a batch priority and the queue from which it was
+ * removed is now empty, advance the batch's queue removal index if it
+ * lags with respect to the batch's queue insertion index.
+ */
+ if (queue_empty && PRI_MIN_BATCH <= td->td_priority &&
+ td->td_priority <= PRI_MAX_BATCH &&
+ tdq->tdq_ts_off != tdq->tdq_ts_deq_off &&
+ tdq->tdq_ts_deq_off == td->td_rqindex)
+ tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) %
+ RQ_TS_POL_MODULO;
}
/*
@@ -1178,82 +1219,84 @@
ipi_cpu(cpu, IPI_PREEMPT);
}
+struct runq_steal_pred_data {
+ struct thread *td;
+ int cpu;
+};
+
+static bool
+runq_steal_pred(const int idx, struct rq_queue *const q, void *const data)
+{
+ struct runq_steal_pred_data *const d = data;
+ struct thread *td;
+
+ TAILQ_FOREACH(td, q, td_runq) {
+ if (THREAD_CAN_MIGRATE(td) && THREAD_CAN_SCHED(td, d->cpu)) {
+ d->td = td;
+ return (true);
+ }
+ }
+
+ return (false);
+}
+
+/*
+ * Steals load contained in queues with indices in the specified range.
+ */
+static inline struct thread *
+runq_steal_range(struct runq *const rq, const int lvl_min, const int lvl_max,
+ int cpu)
+{
+ struct runq_steal_pred_data data = {
+ .td = NULL,
+ .cpu = cpu,
+ };
+ int idx;
+
+ idx = runq_findq(rq, lvl_min, lvl_max, &runq_steal_pred, &data);
+ if (idx != -1) {
+ MPASS(data.td != NULL);
+ return (data.td);
+ }
+
+ MPASS(data.td == NULL);
+ return (NULL);
+}
+
+static inline struct thread *
+runq_steal_realtime(struct runq *const rq, int cpu)
+{
+
+ return (runq_steal_range(rq, RQ_RT_POL_MIN, RQ_RT_POL_MAX, cpu));
+}
+
/*
* Steals load from a timeshare queue. Honors the rotating queue head
* index.
*/
-static struct thread *
-runq_steal_from(struct runq *rq, int cpu, u_char start)
+static inline struct thread *
+runq_steal_timeshare(struct runq *const rq, int cpu, int off)
{
- struct rqbits *rqb;
- struct rqhead *rqh;
- struct thread *td, *first;
- int bit;
- int i;
-
- rqb = &rq->rq_status;
- bit = start & (RQB_BPW -1);
- first = NULL;
-again:
- for (i = RQB_WORD(start); i < RQB_LEN; bit = 0, i++) {
- if (rqb->rqb_bits[i] == 0)
- continue;
- if (bit == 0)
- bit = RQB_FFS(rqb->rqb_bits[i]);
- for (; bit < RQB_BPW; bit++) {
- if ((rqb->rqb_bits[i] & (1ul << bit)) == 0)
- continue;
- rqh = &rq->rq_queues[bit + (i << RQB_L2BPW)];
- TAILQ_FOREACH(td, rqh, td_runq) {
- if (first) {
- if (THREAD_CAN_MIGRATE(td) &&
- THREAD_CAN_SCHED(td, cpu))
- return (td);
- } else
- first = td;
- }
- }
- }
- if (start != 0) {
- start = 0;
- goto again;
- }
-
- if (first && THREAD_CAN_MIGRATE(first) &&
- THREAD_CAN_SCHED(first, cpu))
- return (first);
- return (NULL);
-}
-
-/*
- * Steals load from a standard linear queue.
- */
-static struct thread *
-runq_steal(struct runq *rq, int cpu)
-{
- struct rqhead *rqh;
- struct rqbits *rqb;
struct thread *td;
- int word;
- int bit;
- rqb = &rq->rq_status;
- for (word = 0; word < RQB_LEN; word++) {
- if (rqb->rqb_bits[word] == 0)
- continue;
- for (bit = 0; bit < RQB_BPW; bit++) {
- if ((rqb->rqb_bits[word] & (1ul << bit)) == 0)
- continue;
- rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)];
- TAILQ_FOREACH(td, rqh, td_runq)
- if (THREAD_CAN_MIGRATE(td) &&
- THREAD_CAN_SCHED(td, cpu))
- return (td);
- }
- }
- return (NULL);
+ MPASS(0 <= off && off < RQ_TS_POL_MODULO);
+
+ td = runq_steal_range(rq, RQ_TS_POL_MIN + off, RQ_TS_POL_MAX, cpu);
+ if (td != NULL || off == 0)
+ return (td);
+
+ td = runq_steal_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1, cpu);
+ return (td);
}
+static inline struct thread *
+runq_steal_idle(struct runq *const rq, int cpu)
+{
+
+ return (runq_steal_range(rq, RQ_ID_POL_MIN, RQ_ID_POL_MAX, cpu));
+}
+
+
/*
* Attempt to steal a thread in priority order from a thread queue.
*/
@@ -1263,12 +1306,13 @@
struct thread *td;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- if ((td = runq_steal(&tdq->tdq_realtime, cpu)) != NULL)
+ td = runq_steal_realtime(&tdq->tdq_runq, cpu);
+ if (td != NULL)
return (td);
- if ((td = runq_steal_from(&tdq->tdq_timeshare,
- cpu, tdq->tdq_ridx)) != NULL)
+ td = runq_steal_timeshare(&tdq->tdq_runq, cpu, tdq->tdq_ts_deq_off);
+ if (td != NULL)
return (td);
- return (runq_steal(&tdq->tdq_idle, cpu));
+ return (runq_steal_idle(&tdq->tdq_runq, cpu));
}
/*
@@ -1450,6 +1494,35 @@
}
#endif
+static inline struct thread *
+runq_choose_realtime(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, RQ_RT_POL_MIN, RQ_RT_POL_MAX));
+}
+
+static struct thread *
+runq_choose_timeshare(struct runq *const rq, int off)
+{
+ struct thread *td;
+
+ MPASS(0 <= off && off < RQ_TS_POL_MODULO);
+
+ td = runq_first_thread_range(rq, RQ_TS_POL_MIN + off, RQ_TS_POL_MAX);
+ if (td != NULL || off == 0)
+ return (td);
+
+ td = runq_first_thread_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1);
+ return (td);
+}
+
+static inline struct thread *
+runq_choose_idle(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, RQ_ID_POL_MIN, RQ_ID_POL_MAX));
+}
+
/*
* Pick the highest priority task we have and return it.
*/
@@ -1459,17 +1532,17 @@
struct thread *td;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- td = runq_choose(&tdq->tdq_realtime);
+ td = runq_choose_realtime(&tdq->tdq_runq);
if (td != NULL)
return (td);
- td = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
+ td = runq_choose_timeshare(&tdq->tdq_runq, tdq->tdq_ts_deq_off);
if (td != NULL) {
KASSERT(td->td_priority >= PRI_MIN_BATCH,
("tdq_choose: Invalid priority on timeshare queue %d",
td->td_priority));
return (td);
}
- td = runq_choose(&tdq->tdq_idle);
+ td = runq_choose_idle(&tdq->tdq_runq);
if (td != NULL) {
KASSERT(td->td_priority >= PRI_MIN_IDLE,
("tdq_choose: Invalid priority on idle queue %d",
@@ -1489,9 +1562,7 @@
if (bootverbose)
printf("ULE: setup cpu %d\n", id);
- runq_init(&tdq->tdq_realtime);
- runq_init(&tdq->tdq_timeshare);
- runq_init(&tdq->tdq_idle);
+ runq_init(&tdq->tdq_runq);
tdq->tdq_id = id;
snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),
"sched lock %d", (int)TDQ_ID(tdq));
@@ -2595,13 +2666,14 @@
tdq->tdq_switchcnt = tdq->tdq_load;
/*
- * Advance the insert index once for each tick to ensure that all
+ * Advance the insert offset once for each tick to ensure that all
* threads get a chance to run.
*/
- if (tdq->tdq_idx == tdq->tdq_ridx) {
- tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS;
- if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx]))
- tdq->tdq_ridx = tdq->tdq_idx;
+ if (tdq->tdq_ts_off == tdq->tdq_ts_deq_off) {
+ tdq->tdq_ts_off = (tdq->tdq_ts_off + 1) % RQ_TS_POL_MODULO;
+ if (runq_is_queue_empty(&tdq->tdq_runq,
+ tdq->tdq_ts_deq_off + RQ_TS_POL_MIN))
+ tdq->tdq_ts_deq_off = tdq->tdq_ts_off;
}
ts = td_get_sched(td);
sched_pctcpu_update(ts, 1);
@@ -2655,24 +2727,20 @@
* Return whether the current CPU has runnable tasks. Used for in-kernel
* cooperative idle threads.
*/
-int
+bool
sched_runnable(void)
{
struct tdq *tdq;
- int load;
-
- load = 1;
tdq = TDQ_SELF();
if ((curthread->td_flags & TDF_IDLETD) != 0) {
if (TDQ_LOAD(tdq) > 0)
- goto out;
+ return (true);
} else
if (TDQ_LOAD(tdq) - 1 > 0)
- goto out;
- load = 0;
-out:
- return (load);
+ return (true);
+
+ return (false);
}
/*
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
--- a/sys/kern/subr_log.c
+++ b/sys/kern/subr_log.c
@@ -47,7 +47,7 @@
#include <sys/filedesc.h>
#include <sys/sysctl.h>
-#define LOG_RDPRI (PZERO + 1)
+#define LOG_RDPRI PZERO
#define LOG_ASYNC 0x04
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -894,7 +894,7 @@
we_own_it = 1;
}
DPRINTF(("msgsnd: goodnight\n"));
- error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
+ error = msleep(msqkptr, &msq_mtx, PVFS | PCATCH,
"msgsnd", hz);
DPRINTF(("msgsnd: good morning, error=%d\n", error));
if (we_own_it)
@@ -1303,7 +1303,7 @@
*/
DPRINTF(("msgrcv: goodnight\n"));
- error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
+ error = msleep(msqkptr, &msq_mtx, PVFS | PCATCH,
"msgrcv", 0);
DPRINTF(("msgrcv: good morning (error=%d)\n", error));
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -1309,7 +1309,7 @@
semptr->semncnt++;
DPRINTF(("semop: good night!\n"));
- error = msleep_sbt(semakptr, sema_mtxp, (PZERO - 4) | PCATCH,
+ error = msleep_sbt(semakptr, sema_mtxp, PVFS | PCATCH,
"semwait", sbt, precision, C_ABSOLUTE);
DPRINTF(("semop: good morning (error=%d)!\n", error));
/* return code is checked below, after sem[nz]cnt-- */
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -756,7 +756,7 @@
break;
}
error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
- (PRIBIO + 4) | slpflag, "newbuf", slptimeo);
+ PVFS | slpflag, "newbuf", slptimeo);
if (error != 0)
break;
}
@@ -2654,8 +2654,7 @@
mtx_lock(&bdirtylock);
while (buf_dirty_count_severe()) {
bdirtywait = 1;
- msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
- "flswai", 0);
+ msleep(&bdirtywait, &bdirtylock, PVFS, "flswai", 0);
}
mtx_unlock(&bdirtylock);
}
@@ -5234,7 +5233,7 @@
while (bo->bo_numoutput) {
bo->bo_flag |= BO_WWAIT;
error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo),
- slpflag | (PRIBIO + 1), "bo_wwait", timeo);
+ slpflag | PRIBIO, "bo_wwait", timeo);
if (error)
break;
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -787,7 +787,7 @@
}
DROP_GIANT();
sleepq_add(&fp->f_vnread_flags, NULL, "vofflock", 0, 0);
- sleepq_wait(&fp->f_vnread_flags, PUSER -1);
+ sleepq_wait(&fp->f_vnread_flags, PRI_MAX_KERN);
PICKUP_GIANT();
sleepq_lock(&fp->f_vnread_flags);
state = atomic_load_16(flagsp);
@@ -849,7 +849,7 @@
if ((flags & FOF_NOLOCK) == 0) {
while (fp->f_vnread_flags & FOFFSET_LOCKED) {
fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
- msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+ msleep(&fp->f_vnread_flags, mtxp, PRI_MAX_KERN,
"vofflock", 0);
}
fp->f_vnread_flags |= FOFFSET_LOCKED;
@@ -1897,7 +1897,7 @@
if (flags & V_PCATCH)
mflags |= PCATCH;
}
- mflags |= (PUSER - 1);
+ mflags |= PRI_MAX_KERN;
while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
if ((flags & V_NOWAIT) != 0) {
error = EWOULDBLOCK;
@@ -2022,7 +2022,7 @@
if ((flags & V_PCATCH) != 0)
mflags |= PCATCH;
}
- mflags |= (PUSER - 1) | PDROP;
+ mflags |= PRI_MAX_KERN | PDROP;
error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags, "suspfs", 0);
vfs_rel(mp);
if (error == 0)
@@ -2107,7 +2107,7 @@
return (EALREADY);
}
while (mp->mnt_kern_flag & MNTK_SUSPEND)
- msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
+ msleep(&mp->mnt_flag, MNT_MTX(mp), PRI_MAX_KERN, "wsuspfs", 0);
/*
* Unmount holds a write reference on the mount point. If we
@@ -2128,7 +2128,7 @@
mp->mnt_susp_owner = curthread;
if (mp->mnt_writeopcount > 0)
(void) msleep(&mp->mnt_writeopcount,
- MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
+ MNT_MTX(mp), PRI_MAX_KERN | PDROP, "suspwt", 0);
else
MNT_IUNLOCK(mp);
if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0) {
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -834,7 +834,7 @@
tp->tun_flags &= ~TUN_RWAIT;
wakeup(tp);
}
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ selwakeuppri(&tp->tun_rsel, PZERO);
KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
TUN_UNLOCK(tp);
@@ -1172,7 +1172,7 @@
CURVNET_RESTORE();
funsetown(&tp->tun_sigio);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ selwakeuppri(&tp->tun_rsel, PZERO);
KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
TUNDEBUG (ifp, "closed\n");
tp->tun_flags &= ~TUN_OPEN;
@@ -1706,7 +1706,7 @@
return (EWOULDBLOCK);
}
tp->tun_flags |= TUN_RWAIT;
- error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | PZERO,
"tunread", 0);
if (error != 0) {
TUN_UNLOCK(tp);
diff --git a/sys/netgraph/ng_device.c b/sys/netgraph/ng_device.c
--- a/sys/netgraph/ng_device.c
+++ b/sys/netgraph/ng_device.c
@@ -462,7 +462,7 @@
mtx_lock(&priv->ngd_mtx);
priv->flags |= NGDF_RWAIT;
if ((error = msleep(priv, &priv->ngd_mtx,
- PDROP | PCATCH | (PZERO + 1),
+ PDROP | PCATCH | PZERO,
"ngdread", 0)) != 0)
return (error);
}
diff --git a/sys/powerpc/include/runq.h b/sys/powerpc/include/runq.h
deleted file mode 100644
--- a/sys/powerpc/include/runq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#ifdef __powerpc64__
-#define RQB_LEN (1UL) /* Number of priority status words. */
-#define RQB_L2BPW (6UL) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#else
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#endif
-#define RQB_BPW (1UL<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1UL << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-#ifdef __powerpc64__
-typedef u_int64_t rqb_word_t;
-#else
-typedef u_int32_t rqb_word_t;
-#endif
-
-#endif
diff --git a/sys/riscv/include/runq.h b/sys/riscv/include/runq.h
deleted file mode 100644
--- a/sys/riscv/include/runq.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef unsigned long rqb_word_t;
-
-#endif
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -296,7 +296,7 @@
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp, wmesg) \
- lockinit(&(bp)->b_lock, PRIBIO + 4, wmesg, 0, LK_NEW)
+ lockinit(&(bp)->b_lock, PVFS, wmesg, 0, LK_NEW)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@@ -311,7 +311,7 @@
*/
#define BUF_TIMELOCK(bp, locktype, interlock, wmesg, catch, timo) \
_lockmgr_args_rw(&(bp)->b_lock, (locktype) | LK_TIMELOCK, \
- (interlock), (wmesg), (PRIBIO + 4) | (catch), (timo), \
+ (interlock), (wmesg), PVFS | (catch), (timo), \
LOCK_FILE, LOCK_LINE)
/*
diff --git a/sys/sys/param.h b/sys/sys/param.h
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500018
+#define __FreeBSD_version 1500019
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/priority.h b/sys/sys/priority.h
--- a/sys/sys/priority.h
+++ b/sys/sys/priority.h
@@ -64,17 +64,23 @@
*/
/*
- * Priorities range from 0 to 255, but differences of less then 4 (RQ_PPQ)
- * are insignificant. Ranges are as follows:
+ * Priorities range from 0 to 255. Ranges are as follows:
*
- * Interrupt threads: 0 - 15
- * Realtime user threads: 16 - 47
- * Top half kernel threads: 48 - 87
- * Time sharing user threads: 88 - 223
+ * Interrupt threads: 0 - 7
+ * Realtime user threads: 8 - 39
+ * Top half kernel threads: 40 - 55
+ * Time sharing user threads: 56 - 223
* Idle user threads: 224 - 255
*
- * XXX If/When the specific interrupt thread and top half thread ranges
- * disappear, a larger range can be used for user processes.
+ * Priority levels of rtprio(2)'s RTP_PRIO_FIFO and RTP_PRIO_REALTIME and
+ * POSIX's SCHED_FIFO and SCHED_RR are directly mapped to the internal realtime
+ * range mentioned above by a simple translation. This range's length
+ * consequently cannot be changed without impacts on the scheduling priority
+ * code, and in any case must never be smaller than 32 for POSIX compliance and
+ * rtprio(2) backwards compatibility. Similarly, priority levels of rtprio(2)'s
+ * RTP_PRIO_IDLE are directly mapped to the internal idle range above (and,
+ * soon, those of the to-be-introduced SCHED_IDLE policy as well), so changing
+ * that range is subject to the same caveats and restrictions.
*/
#define PRI_MIN (0) /* Highest priority. */
@@ -88,34 +94,34 @@
* decay to lower priorities if they run for full time slices.
*/
#define PI_REALTIME (PRI_MIN_ITHD + 0)
-#define PI_INTR (PRI_MIN_ITHD + 4)
+#define PI_INTR (PRI_MIN_ITHD + 1)
#define PI_AV PI_INTR
#define PI_NET PI_INTR
#define PI_DISK PI_INTR
#define PI_TTY PI_INTR
#define PI_DULL PI_INTR
-#define PI_SOFT (PRI_MIN_ITHD + 8)
+#define PI_SOFT (PRI_MIN_ITHD + 2)
#define PI_SOFTCLOCK PI_SOFT
#define PI_SWI(x) PI_SOFT
-#define PRI_MIN_REALTIME (16)
+#define PRI_MIN_REALTIME (8)
#define PRI_MAX_REALTIME (PRI_MIN_KERN - 1)
-#define PRI_MIN_KERN (48)
+#define PRI_MIN_KERN (40)
#define PRI_MAX_KERN (PRI_MIN_TIMESHARE - 1)
#define PSWP (PRI_MIN_KERN + 0)
-#define PVM (PRI_MIN_KERN + 4)
-#define PINOD (PRI_MIN_KERN + 8)
-#define PRIBIO (PRI_MIN_KERN + 12)
-#define PVFS (PRI_MIN_KERN + 16)
-#define PZERO (PRI_MIN_KERN + 20)
-#define PSOCK (PRI_MIN_KERN + 24)
-#define PWAIT (PRI_MIN_KERN + 28)
-#define PLOCK (PRI_MIN_KERN + 32)
-#define PPAUSE (PRI_MIN_KERN + 36)
+#define PVM (PRI_MIN_KERN + 1)
+#define PINOD (PRI_MIN_KERN + 2)
+#define PRIBIO (PRI_MIN_KERN + 3)
+#define PVFS (PRI_MIN_KERN + 4)
+#define PZERO (PRI_MIN_KERN + 5)
+#define PSOCK (PRI_MIN_KERN + 6)
+#define PWAIT (PRI_MIN_KERN + 7)
+#define PLOCK (PRI_MIN_KERN + 8)
+#define PPAUSE (PRI_MIN_KERN + 9)
-#define PRI_MIN_TIMESHARE (88)
+#define PRI_MIN_TIMESHARE (56)
#define PRI_MAX_TIMESHARE (PRI_MIN_IDLE - 1)
#define PUSER (PRI_MIN_TIMESHARE)
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -53,7 +53,6 @@
#include <sys/osd.h>
#include <sys/priority.h>
#include <sys/rtprio.h> /* XXX. */
-#include <sys/runq.h>
#include <sys/resource.h>
#include <sys/sigio.h>
#include <sys/signal.h>
diff --git a/sys/sys/runq.h b/sys/sys/runq.h
--- a/sys/sys/runq.h
+++ b/sys/sys/runq.h
@@ -29,7 +29,11 @@
#ifndef _RUNQ_H_
#define _RUNQ_H_
-#include <machine/runq.h>
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+#include <sys/types.h> /* For bool. */
struct thread;
@@ -37,20 +41,65 @@
* Run queue parameters.
*/
-#define RQ_NQS (64) /* Number of run queues. */
-#define RQ_PPQ (4) /* Priorities per queue. */
+#define RQ_MAX_PRIO (255) /* Maximum priority (minimum is 0). */
+#define RQ_PPQ (1) /* Priorities per queue. */
/*
- * Head of run queues.
+ * Convenience macros from <sys/param.h>.
*/
-TAILQ_HEAD(rqhead, thread);
+#ifndef NBBY
+#define NBBY 8
+#endif
+#ifndef howmany
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#endif
+
+/*
+ * Deduced from the above parameters and machine ones.
+ */
+#define RQ_NQS (howmany(RQ_MAX_PRIO + 1, RQ_PPQ)) /* Number of run queues. */
+#define RQ_PRI_TO_QUEUE_IDX(pri) ((pri) / RQ_PPQ) /* Priority to queue index. */
+
+typedef unsigned long rqsw_t; /* runq's status words type. */
+#define RQSW_BPW (sizeof(rqsw_t) * NBBY) /* Bits per runq word. */
+#if defined(_LP64)
+#define RQSW_L2BPW (6) /* Log2(sizeof(rqsw_t) * NBBY)). */
+#elif defined(_ILP32)
+#define RQSW_L2BPW (5) /* Log2(sizeof(rqsw_t) * NBBY)). */
+#else
+#error Not _LP64 nor _ILP32!
+#endif
+/*
+ * That RQSW_BPW and RQSW_L2BPW are consistent is checked by a static assertion.
+ */
+
+/* Number of status words to cover RQ_NQS queues. */
+#define RQSW_NB (howmany(RQ_NQS, RQSW_BPW))
+#define RQSW_IDX(idx) ((idx) >> RQSW_L2BPW)
+#define RQSW_BIT_IDX(idx) ((idx) & (RQSW_BPW - 1))
+#define RQSW_BIT(idx) (1ul << RQSW_BIT_IDX(idx))
+#define RQSW_BSF(word) ({ \
+ int _res = ffsl((long)(word)); /* Assumes two-complement. */ \
+ MPASS(_res > 0); \
+ _res - 1; \
+})
+#define RQSW_TO_QUEUE_IDX(word_idx, bit_idx) \
+ (((word_idx) << RQSW_L2BPW) + (bit_idx))
+#define RQSW_FIRST_QUEUE_IDX(word_idx, word) \
+ RQSW_TO_QUEUE_IDX(word_idx, RQSW_BSF(word))
+
+
+/*
+ * The queue for a given index as a list of threads.
+ */
+TAILQ_HEAD(rq_queue, thread);
/*
* Bit array which maintains the status of a run queue. When a queue is
* non-empty the bit corresponding to the queue number will be set.
*/
-struct rqbits {
- rqb_word_t rqb_bits[RQB_LEN];
+struct rq_status {
+ rqsw_t rq_sw[RQSW_NB];
};
/*
@@ -58,18 +107,29 @@
* are placed, and a structure to maintain the status of each queue.
*/
struct runq {
- struct rqbits rq_status;
- struct rqhead rq_queues[RQ_NQS];
+ struct rq_status rq_status;
+ struct rq_queue rq_queues[RQ_NQS];
};
-void runq_add(struct runq *, struct thread *, int);
-void runq_add_pri(struct runq *, struct thread *, u_char, int);
-int runq_check(struct runq *);
-struct thread *runq_choose(struct runq *);
-struct thread *runq_choose_from(struct runq *, u_char);
-struct thread *runq_choose_fuzz(struct runq *, int);
void runq_init(struct runq *);
-void runq_remove(struct runq *, struct thread *);
-void runq_remove_idx(struct runq *, struct thread *, u_char *);
+bool runq_is_queue_empty(struct runq *, int _idx);
+void runq_add(struct runq *, struct thread *, int _flags);
+void runq_add_idx(struct runq *, struct thread *, int _idx, int _flags);
+bool runq_remove(struct runq *, struct thread *);
+
+/*
+ * Implementation helpers for common and scheduler-specific runq_choose*()
+ * functions.
+ */
+typedef bool runq_pred_t(int _idx, struct rq_queue *, void *_data);
+int runq_findq(struct runq *const rq, const int lvl_min,
+ const int lvl_max,
+ runq_pred_t *const pred, void *const pred_data);
+struct thread *runq_first_thread_range(struct runq *const rq,
+ const int lvl_min, const int lvl_max);
+
+bool runq_not_empty(struct runq *);
+struct thread *runq_choose(struct runq *);
+struct thread *runq_choose_fuzz(struct runq *, int _fuzz);
#endif
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -63,6 +63,9 @@
#define _SCHED_H_
#ifdef _KERNEL
+
+#include <sys/types.h> /* For bool. */
+
/*
* General scheduling info.
*
@@ -74,7 +77,7 @@
*/
int sched_load(void);
int sched_rr_interval(void);
-int sched_runnable(void);
+bool sched_runnable(void);
/*
* Proc related scheduling hooks.
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -497,7 +497,7 @@
while (mp->mnt_secondary_writes != 0) {
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
MNT_ILOCK(mp);
}
@@ -14561,7 +14561,7 @@
while (mp->mnt_secondary_writes != 0) {
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
MNT_ILOCK(mp);
}
@@ -14601,7 +14601,7 @@
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes,
MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
continue;
}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1399,8 +1399,7 @@
VI_LOCK(vp);
while (ip->i_flag & IN_EA_LOCKED) {
UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
- msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
- 0);
+ msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD, "ufs_ea", 0);
}
UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
VI_UNLOCK(vp);
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -179,7 +179,7 @@
if ((dq = ip->i_dquot[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq1");
+ DQI_WAIT(dq, PINOD, "chkdq1");
ncurblocks = dq->dq_curblocks + change;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
@@ -201,7 +201,7 @@
continue;
warn = 0;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq2");
+ DQI_WAIT(dq, PINOD, "chkdq2");
if (do_check) {
error = chkdqchg(ip, change, cred, i, &warn);
if (error) {
@@ -215,7 +215,7 @@
if (dq == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq3");
+ DQI_WAIT(dq, PINOD, "chkdq3");
ncurblocks = dq->dq_curblocks - change;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
@@ -320,7 +320,7 @@
if ((dq = ip->i_dquot[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq1");
+ DQI_WAIT(dq, PINOD, "chkiq1");
if (dq->dq_curinodes >= -change)
dq->dq_curinodes += change;
else
@@ -341,7 +341,7 @@
continue;
warn = 0;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq2");
+ DQI_WAIT(dq, PINOD, "chkiq2");
if (do_check) {
error = chkiqchg(ip, change, cred, i, &warn);
if (error) {
@@ -355,7 +355,7 @@
if (dq == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq3");
+ DQI_WAIT(dq, PINOD, "chkiq3");
if (dq->dq_curinodes >= change)
dq->dq_curinodes -= change;
else
@@ -855,7 +855,7 @@
return (error);
dq = ndq;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "setqta");
+ DQI_WAIT(dq, PINOD, "setqta");
/*
* Copy all but the current values.
* Reset time limit if previously had no soft limit or were
@@ -918,7 +918,7 @@
return (error);
dq = ndq;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "setuse");
+ DQI_WAIT(dq, PINOD, "setuse");
/*
* Reset time limit if have a soft limit and were
* previously under it, but are now over it.
@@ -1314,7 +1314,7 @@
if (dq != NULL) {
DQH_UNLOCK();
hfound: DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "dqget");
+ DQI_WAIT(dq, PINOD, "dqget");
DQI_UNLOCK(dq);
if (dq->dq_ump == NULL) {
dqrele(vp, dq);
@@ -1588,7 +1588,7 @@
vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+2, "dqsync");
+ DQI_WAIT(dq, PINOD, "dqsync");
if ((dq->dq_flags & DQ_MOD) == 0)
goto out;
dq->dq_flags |= DQ_LOCK;
@@ -1742,7 +1742,7 @@
if ((dq = qrp[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "adjqta");
+ DQI_WAIT(dq, PINOD, "adjqta");
ncurblocks = dq->dq_curblocks + blkcount;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
diff --git a/tests/sys/kern/ptrace_test.c b/tests/sys/kern/ptrace_test.c
--- a/tests/sys/kern/ptrace_test.c
+++ b/tests/sys/kern/ptrace_test.c
@@ -34,7 +34,6 @@
#include <sys/ptrace.h>
#include <sys/procfs.h>
#include <sys/queue.h>
-#include <sys/runq.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/user.h>
@@ -2027,7 +2026,7 @@
sched_get_priority_min(SCHED_FIFO)) / 2;
CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
SCHED_FIFO, &sched_param) == 0);
- sched_param.sched_priority -= RQ_PPQ;
+ sched_param.sched_priority -= 1;
CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
&sched_param) == 0);
@@ -2130,7 +2129,7 @@
sched_get_priority_min(SCHED_FIFO)) / 2;
CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
SCHED_FIFO, &sched_param) == 0);
- sched_param.sched_priority -= RQ_PPQ;
+ sched_param.sched_priority -= 1;
CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
&sched_param) == 0);

File Metadata

Mime Type
text/plain
Expires
Sun, Sep 22, 12:38 PM (22 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12451471
Default Alt Text
D45393.diff (80 KB)

Event Timeline