Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F95555344
D45393.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
80 KB
Referenced Files
None
Subscribers
None
D45393.diff
View Options
diff --git a/sys/amd64/include/runq.h b/sys/amd64/include/runq.h
deleted file mode 100644
--- a/sys/amd64/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (bsfq(word))
-
-/*
- * Type of run queue status word.
- */
-typedef u_int64_t rqb_word_t;
-
-#endif
diff --git a/sys/arm/include/runq.h b/sys/arm/include/runq.h
deleted file mode 100644
--- a/sys/arm/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1 << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffs(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef u_int32_t rqb_word_t;
-
-#endif
diff --git a/sys/arm64/include/runq.h b/sys/arm64/include/runq.h
deleted file mode 100644
--- a/sys/arm64/include/runq.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*-
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __arm__
-#include <arm/runq.h>
-#else /* !__arm__ */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef unsigned long rqb_word_t;
-
-#endif
-
-#endif /* !__arm__ */
diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
--- a/sys/cam/ctl/ctl.c
+++ b/sys/cam/ctl/ctl.c
@@ -14307,7 +14307,7 @@
CTL_DEBUG_PRINT(("ctl_work_thread starting\n"));
thread_lock(curthread);
- sched_prio(curthread, PUSER - 1);
+ sched_prio(curthread, PRI_MAX_KERN);
thread_unlock(curthread);
while (!softc->shutdown) {
@@ -14399,7 +14399,7 @@
CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n"));
thread_lock(curthread);
- sched_prio(curthread, PUSER - 1);
+ sched_prio(curthread, PRI_MAX_KERN);
thread_unlock(curthread);
while (!softc->shutdown) {
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/proc.h
@@ -44,7 +44,9 @@
#ifdef _KERNEL
#define CPU curcpu
#define minclsyspri PRIBIO
-#define defclsyspri minclsyspri
+#define defclsyspri minclsyspri
+/* Write issue taskq priority. */
+#define wtqclsyspri ((PVM + PRIBIO) / 2)
#define maxclsyspri PVM
#define max_ncpus (mp_maxid + 1)
#define boot_max_ncpus (mp_maxid + 1)
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/sysmacros.h
@@ -91,8 +91,10 @@
* Treat shim tasks as SCHED_NORMAL tasks
*/
#define minclsyspri (MAX_PRIO-1)
-#define maxclsyspri (MAX_RT_PRIO)
#define defclsyspri (DEFAULT_PRIO)
+/* Write issue taskq priority. */
+#define wtqclsyspri (MAX_RT_PRIO + 1)
+#define maxclsyspri (MAX_RT_PRIO)
#ifndef NICE_TO_PRIO
#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
diff --git a/sys/contrib/openzfs/include/sys/zfs_context.h b/sys/contrib/openzfs/include/sys/zfs_context.h
--- a/sys/contrib/openzfs/include/sys/zfs_context.h
+++ b/sys/contrib/openzfs/include/sys/zfs_context.h
@@ -622,8 +622,10 @@
* Process priorities as defined by setpriority(2) and getpriority(2).
*/
#define minclsyspri 19
-#define maxclsyspri -20
#define defclsyspri 0
+/* Write issue taskq priority. */
+#define wtqclsyspri -19
+#define maxclsyspri -20
#define CPU_SEQID ((uintptr_t)pthread_self() & (max_ncpus - 1))
#define CPU_SEQID_UNSTABLE CPU_SEQID
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -1158,29 +1158,14 @@
spa->spa_proc, zio_taskq_basedc, flags);
} else {
#endif
- pri_t pri = maxclsyspri;
/*
* The write issue taskq can be extremely CPU
* intensive. Run it at slightly less important
* priority than the other taskqs.
- *
- * Under Linux and FreeBSD this means incrementing
- * the priority value as opposed to platforms like
- * illumos where it should be decremented.
- *
- * On FreeBSD, if priorities divided by four (RQ_PPQ)
- * are equal then a difference between them is
- * insignificant.
*/
- if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) {
-#if defined(__linux__)
- pri++;
-#elif defined(__FreeBSD__)
- pri += 4;
-#else
-#error "unknown OS"
-#endif
- }
+ const pri_t pri = (t == ZIO_TYPE_WRITE &&
+ q == ZIO_TASKQ_ISSUE) ?
+ wtqclsyspri : maxclsyspri;
tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
#ifdef HAVE_SYSDC
diff --git a/sys/dev/beri/beri_ring.c b/sys/dev/beri/beri_ring.c
--- a/sys/dev/beri/beri_ring.c
+++ b/sys/dev/beri/beri_ring.c
@@ -170,7 +170,7 @@
}
mtx_lock(&sc->beri_mtx);
- selwakeuppri(&sc->beri_rsel, PZERO + 1);
+ selwakeuppri(&sc->beri_rsel, PZERO);
KNOTE_LOCKED(&sc->beri_rsel.si_note, 0);
mtx_unlock(&sc->beri_mtx);
}
@@ -190,7 +190,7 @@
}
mtx_lock(&sc->beri_mtx);
- selwakeuppri(&sc->beri_rsel, PZERO + 1);
+ selwakeuppri(&sc->beri_rsel, PZERO);
KNOTE_LOCKED(&sc->beri_rsel.si_note, 0);
mtx_unlock(&sc->beri_mtx);
}
diff --git a/sys/dev/firewire/firewirereg.h b/sys/dev/firewire/firewirereg.h
--- a/sys/dev/firewire/firewirereg.h
+++ b/sys/dev/firewire/firewirereg.h
@@ -293,7 +293,7 @@
extern devclass_t firewire_devclass;
extern int firewire_phydma_enable;
-#define FWPRI ((PZERO + 8) | PCATCH)
+#define FWPRI (PWAIT | PCATCH)
#define CALLOUT_INIT(x) callout_init(x, 1 /* mpsafe */)
diff --git a/sys/dev/syscons/syscons.c b/sys/dev/syscons/syscons.c
--- a/sys/dev/syscons/syscons.c
+++ b/sys/dev/syscons/syscons.c
@@ -1310,7 +1310,7 @@
if (i == sc->cur_scp->index)
return 0;
error =
- tsleep(VTY_WCHAN(sc, i), (PZERO + 1) | PCATCH, "waitvt", 0);
+ tsleep(VTY_WCHAN(sc, i), PZERO | PCATCH, "waitvt", 0);
return error;
case VT_GETACTIVE: /* get active vty # */
diff --git a/sys/dev/usb/usb_process.h b/sys/dev/usb/usb_process.h
--- a/sys/dev/usb/usb_process.h
+++ b/sys/dev/usb/usb_process.h
@@ -31,7 +31,6 @@
#ifndef USB_GLOBAL_INCLUDE_FILE
#include <sys/interrupt.h>
#include <sys/priority.h>
-#include <sys/runq.h>
#endif
/* defines */
diff --git a/sys/dev/vkbd/vkbd.c b/sys/dev/vkbd/vkbd.c
--- a/sys/dev/vkbd/vkbd.c
+++ b/sys/dev/vkbd/vkbd.c
@@ -82,7 +82,7 @@
#define VKBD_UNLOCK(s) mtx_unlock(&(s)->ks_lock)
#define VKBD_LOCK_ASSERT(s, w) mtx_assert(&(s)->ks_lock, w)
#define VKBD_SLEEP(s, f, d, t) \
- msleep(&(s)->f, &(s)->ks_lock, PCATCH | (PZERO + 1), d, t)
+ msleep(&(s)->f, &(s)->ks_lock, PCATCH | PZERO, d, t)
#else
#define VKBD_LOCK_DECL
#define VKBD_LOCK_INIT(s)
@@ -90,7 +90,7 @@
#define VKBD_LOCK(s)
#define VKBD_UNLOCK(s)
#define VKBD_LOCK_ASSERT(s, w)
-#define VKBD_SLEEP(s, f, d, t) tsleep(&(s)->f, PCATCH | (PZERO + 1), d, t)
+#define VKBD_SLEEP(s, f, d, t) tsleep(&(s)->f, PCATCH | PZERO, d, t)
#endif
#define VKBD_KEYBOARD(d) \
@@ -268,8 +268,8 @@
VKBD_SLEEP(state, ks_task, "vkbdc", 0);
/* wakeup poll()ers */
- selwakeuppri(&state->ks_rsel, PZERO + 1);
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_rsel, PZERO);
+ selwakeuppri(&state->ks_wsel, PZERO);
state->ks_flags &= ~OPEN;
state->ks_dev = NULL;
@@ -498,7 +498,7 @@
if (!(state->ks_flags & STATUS)) {
state->ks_flags |= STATUS;
- selwakeuppri(&state->ks_rsel, PZERO + 1);
+ selwakeuppri(&state->ks_rsel, PZERO);
wakeup(&state->ks_flags);
}
}
@@ -531,7 +531,7 @@
q->head = 0;
/* wakeup ks_inq writers/poll()ers */
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_wsel, PZERO);
wakeup(q);
return (c);
@@ -1246,7 +1246,7 @@
/* flush ks_inq and wakeup writers/poll()ers */
state->ks_inq.head = state->ks_inq.tail = state->ks_inq.cc = 0;
- selwakeuppri(&state->ks_wsel, PZERO + 1);
+ selwakeuppri(&state->ks_wsel, PZERO);
wakeup(&state->ks_inq);
}
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -152,7 +152,7 @@
FUSE_LOCK();
fuse_lck_mtx_lock(fdata->aw_mtx);
/* wakup poll()ers */
- selwakeuppri(&fdata->ks_rsel, PZERO + 1);
+ selwakeuppri(&fdata->ks_rsel, PZERO);
/* Don't let syscall handlers wait in vain */
while ((tick = fuse_aw_pop(fdata))) {
fuse_lck_mtx_lock(tick->tk_aw_mtx);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -932,7 +932,7 @@
if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
return EIO;
fvdat->flag |= FN_FLUSHWANT;
- tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
+ tsleep(&fvdat->flag, PRIBIO, "fusevinv", 2 * hz);
error = 0;
if (p != NULL) {
PROC_LOCK(p);
diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c
--- a/sys/fs/fuse/fuse_ipc.c
+++ b/sys/fs/fuse/fuse_ipc.c
@@ -593,7 +593,7 @@
fuse_lck_mtx_lock(data->ms_mtx);
data->dataflags |= FSESS_DEAD;
wakeup_one(data);
- selwakeuppri(&data->ks_rsel, PZERO + 1);
+ selwakeuppri(&data->ks_rsel, PZERO);
wakeup(&data->ticketer);
fuse_lck_mtx_unlock(data->ms_mtx);
FUSE_UNLOCK();
@@ -669,7 +669,7 @@
else
fuse_ms_push(ftick);
wakeup_one(ftick->tk_data);
- selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1);
+ selwakeuppri(&ftick->tk_data->ks_rsel, PZERO);
KNOTE_LOCKED(&ftick->tk_data->ks_rsel.si_note, 0);
fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx);
}
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -4644,7 +4644,7 @@
ts.tv_sec = 0;
ts.tv_nsec = 0;
(void) nfsmsleep((caddr_t)flagp, NFSSOCKMUTEXPTR,
- PZERO - 1, "nfsndlck", &ts);
+ PVFS, "nfsndlck", &ts);
}
*flagp |= NFSR_SNDLOCK;
NFSUNLOCKSOCK();
diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c
--- a/sys/fs/nfsserver/nfs_nfsdcache.c
+++ b/sys/fs/nfsserver/nfs_nfsdcache.c
@@ -392,7 +392,7 @@
nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto loop;
}
@@ -678,7 +678,7 @@
rp = hitrp;
if ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+ (void)mtx_sleep(rp, mutex, PVFS | PDROP,
"nfsrc", 10 * hz);
goto tryagain;
}
@@ -750,7 +750,7 @@
mtx_assert(mutex, MA_OWNED);
while ((rp->rc_flag & RC_LOCKED) != 0) {
rp->rc_flag |= RC_WANTED;
- (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
+ (void)mtx_sleep(rp, mutex, PVFS, "nfsrc", 0);
}
rp->rc_flag |= RC_LOCKED;
}
diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c
--- a/sys/fs/nfsserver/nfs_nfsdstate.c
+++ b/sys/fs/nfsserver/nfs_nfsdstate.c
@@ -507,7 +507,7 @@
NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsd clp", 10 * hz);
}
NFSUNLOCKSTATE();
@@ -574,7 +574,7 @@
NFSLOCKSTATE();
while (clp->lc_cbref) {
clp->lc_flags |= LCL_WAKEUPWANTED;
- (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
+ (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PVFS,
"nfsdclp", 10 * hz);
}
NFSUNLOCKSTATE();
diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c
--- a/sys/fs/smbfs/smbfs_io.c
+++ b/sys/fs/smbfs/smbfs_io.c
@@ -629,7 +629,7 @@
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
- error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", 2 * hz);
+ error = tsleep(&np->n_flag, PRIBIO, "smfsvinv", 2 * hz);
error = smb_td_intr(td);
if (error == EINTR)
return EINTR;
diff --git a/sys/i386/include/runq.h b/sys/i386/include/runq.h
deleted file mode 100644
--- a/sys/i386/include/runq.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1 << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffs(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef u_int32_t rqb_word_t;
-
-#endif
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -1010,7 +1010,8 @@
mtx_lock(&rms->mtx);
while (rms->writers > 0)
- msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
+ msleep(&rms->readers, &rms->mtx, PRI_MAX_KERN,
+ mtx_name(&rms->mtx), 0);
critical_enter();
rms_int_readers_inc(rms, rms_int_pcpu(rms));
mtx_unlock(&rms->mtx);
@@ -1197,7 +1198,7 @@
mtx_lock(&rms->mtx);
rms->writers++;
if (rms->writers > 1) {
- msleep(&rms->owner, &rms->mtx, (PUSER - 1),
+ msleep(&rms->owner, &rms->mtx, PRI_MAX_KERN,
mtx_name(&rms->mtx), 0);
MPASS(rms->readers == 0);
KASSERT(rms->owner == RMS_TRANSIENT,
@@ -1213,7 +1214,7 @@
rms_assert_no_pcpu_readers(rms);
if (rms->readers > 0) {
- msleep(&rms->writers, &rms->mtx, (PUSER - 1),
+ msleep(&rms->writers, &rms->mtx, PRI_MAX_KERN,
mtx_name(&rms->mtx), 0);
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -38,6 +38,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -57,8 +58,6 @@
#endif
#endif
-CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
-
/*
* kern.sched.preemption allows user space to determine if preemption support
* is compiled in or not. It is not currently a boot or runtime flag that
@@ -253,6 +252,35 @@
/************************************************************************
* SYSTEM RUN QUEUE manipulations and tests *
************************************************************************/
+_Static_assert(RQSW_BPW == (1 << RQSW_L2BPW),
+ "RQSW_L2BPW and RQSW_BPW / 'rqsw_t' mismatch");
+_Static_assert(RQ_NQS <= 256,
+ "'td_rqindex' must be turned into a bigger unsigned type");
+/* A macro instead of a function to get the proper calling function's name. */
+#define CHECK_IDX(idx) ({ \
+ __typeof(idx) _idx __unused = (idx); \
+ KASSERT(0 <= _idx && _idx < RQ_NQS, \
+ ("%s: %s out of range: %d", __func__, __STRING(idx), _idx)); \
+})
+
+/* Status words' individual bit manipulators' internals. */
+typedef uintptr_t runq_sw_op(int idx, int sw_idx, rqsw_t sw_bit,
+ rqsw_t *swp);
+static inline uintptr_t runq_sw_apply(struct runq *rq, int idx,
+ runq_sw_op *op);
+
+static inline uintptr_t runq_sw_set_not_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+static inline uintptr_t runq_sw_set_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+static inline uintptr_t runq_sw_is_empty_op(int idx, int sw_idx,
+ rqsw_t sw_bit, rqsw_t *swp);
+
+/* Status words' individual bit manipulators. */
+static inline void runq_sw_set_not_empty(struct runq *rq, int idx);
+static inline void runq_sw_set_empty(struct runq *rq, int idx);
+static inline bool runq_sw_is_empty(struct runq *rq, int idx);
+
/*
* Initialize a run structure.
*/
@@ -261,98 +289,96 @@
{
int i;
- bzero(rq, sizeof *rq);
+ bzero(rq, sizeof(*rq));
for (i = 0; i < RQ_NQS; i++)
TAILQ_INIT(&rq->rq_queues[i]);
}
/*
- * Clear the status bit of the queue corresponding to priority level pri,
- * indicating that it is empty.
+ * Helper to implement functions operating on a particular status word bit.
+ *
+ * The operator is passed the initial 'idx', the corresponding status word index
+ * in 'rq_status' in 'sw_idx', a status word with only that bit set in 'sw_bit'
+ * and a pointer to the corresponding status word in 'swp'.
*/
-static __inline void
-runq_clrbit(struct runq *rq, int pri)
+static inline uintptr_t
+runq_sw_apply(struct runq *rq, int idx, runq_sw_op *op)
{
- struct rqbits *rqb;
+ rqsw_t *swp;
+ rqsw_t sw_bit;
+ int sw_idx;
- rqb = &rq->rq_status;
- CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
- rqb->rqb_bits[RQB_WORD(pri)],
- rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
- RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
+ CHECK_IDX(idx);
+
+ sw_idx = RQSW_IDX(idx);
+ sw_bit = RQSW_BIT(idx);
+ swp = &rq->rq_status.rq_sw[sw_idx];
+
+ return (op(idx, sw_idx, sw_bit, swp));
}
/*
- * Find the index of the first non-empty run queue. This is done by
- * scanning the status bits, a set bit indicates a non-empty queue.
+ * Modify the status words to indicate that some queue is not empty.
+ *
+ * Sets the status bit corresponding to the queue at index 'idx'.
*/
-static __inline int
-runq_findbit(struct runq *rq)
+static inline uintptr_t
+runq_sw_set_not_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
{
- struct rqbits *rqb;
- int pri;
- int i;
+ rqsw_t old_sw __unused = *swp;
- rqb = &rq->rq_status;
- for (i = 0; i < RQB_LEN; i++)
- if (rqb->rqb_bits[i]) {
- pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
- CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
- rqb->rqb_bits[i], i, pri);
- return (pri);
- }
-
- return (-1);
+ *swp |= sw_bit;
+ CTR4(KTR_RUNQ, "runq_sw_set_not_empty: idx=%d sw_idx=%d bits=%#x->%#x",
+ idx, sw_idx, old_sw, *swp);
+ return (0);
}
-
-static __inline int
-runq_findbit_from(struct runq *rq, u_char pri)
+static inline void
+runq_sw_set_not_empty(struct runq *rq, int idx)
{
- struct rqbits *rqb;
- rqb_word_t mask;
- int i;
-
- /*
- * Set the mask for the first word so we ignore priorities before 'pri'.
- */
- mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
- rqb = &rq->rq_status;
-again:
- for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
- mask = rqb->rqb_bits[i] & mask;
- if (mask == 0)
- continue;
- pri = RQB_FFS(mask) + (i << RQB_L2BPW);
- CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
- mask, i, pri);
- return (pri);
- }
- if (pri == 0)
- return (-1);
- /*
- * Wrap back around to the beginning of the list just once so we
- * scan the whole thing.
- */
- pri = 0;
- goto again;
+ (void)runq_sw_apply(rq, idx, &runq_sw_set_not_empty_op);
}
/*
- * Set the status bit of the queue corresponding to priority level pri,
- * indicating that it is non-empty.
+ * Modify the status words to indicate that some queue is empty.
+ *
+ * Clears the status bit corresponding to the queue at index 'idx'.
*/
-static __inline void
-runq_setbit(struct runq *rq, int pri)
+static inline uintptr_t
+runq_sw_set_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
{
- struct rqbits *rqb;
+ rqsw_t old_sw __unused = *swp;
- rqb = &rq->rq_status;
- CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
- rqb->rqb_bits[RQB_WORD(pri)],
- rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
- RQB_BIT(pri), RQB_WORD(pri));
- rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
+ *swp &= ~sw_bit;
+ CTR4(KTR_RUNQ, "runq_sw_set_empty: idx=%d sw_idx=%d bits=%#x->%#x",
+ idx, sw_idx, old_sw, *swp);
+ return (0);
+}
+static inline void
+runq_sw_set_empty(struct runq *rq, int idx)
+{
+ (void)runq_sw_apply(rq, idx, &runq_sw_set_empty_op);
+}
+
+/*
+ * Returns whether the status words indicate that some queue is empty.
+ */
+static inline uintptr_t
+runq_sw_is_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
+{
+ return ((*swp & sw_bit) == 0);
+}
+static inline bool
+runq_sw_is_empty(struct runq *rq, int idx)
+{
+ return (runq_sw_apply(rq, idx, &runq_sw_is_empty_op));
+}
+
+/*
+ * Returns whether a particular queue is empty.
+ */
+bool runq_is_queue_empty(struct runq *rq, int idx)
+{
+ return (runq_sw_is_empty(rq, idx));
}
/*
@@ -362,102 +388,183 @@
void
runq_add(struct runq *rq, struct thread *td, int flags)
{
- struct rqhead *rqh;
- int pri;
- pri = td->td_priority / RQ_PPQ;
- td->td_rqindex = pri;
- runq_setbit(rq, pri);
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
- td, td->td_priority, pri, rqh);
- if (flags & SRQ_PREEMPTED) {
- TAILQ_INSERT_HEAD(rqh, td, td_runq);
- } else {
- TAILQ_INSERT_TAIL(rqh, td, td_runq);
- }
+ runq_add_idx(rq, td, RQ_PRI_TO_QUEUE_IDX(td->td_priority), flags);
}
void
-runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
+runq_add_idx(struct runq *rq, struct thread *td, int idx, int flags)
{
- struct rqhead *rqh;
+ struct rq_queue *rqq;
- KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
- td->td_rqindex = pri;
- runq_setbit(rq, pri);
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
- td, td->td_priority, pri, rqh);
- if (flags & SRQ_PREEMPTED) {
- TAILQ_INSERT_HEAD(rqh, td, td_runq);
- } else {
- TAILQ_INSERT_TAIL(rqh, td, td_runq);
- }
+ /*
+ * runq_sw_*() functions assert that 'idx' is non-negative and below
+ * 'RQ_NQS', and a static assert upper in this file ensures that
+ * 'RQ_NQS' is no more than 256.
+ */
+ td->td_rqindex = idx;
+ runq_sw_set_not_empty(rq, idx);
+ rqq = &rq->rq_queues[idx];
+ CTR4(KTR_RUNQ, "runq_add_idx: td=%p pri=%d idx=%d rqq=%p",
+ td, td->td_priority, idx, rqq);
+ if (flags & SRQ_PREEMPTED)
+ TAILQ_INSERT_HEAD(rqq, td, td_runq);
+ else
+ TAILQ_INSERT_TAIL(rqq, td, td_runq);
}
+
/*
- * Return true if there are runnable processes of any priority on the run
- * queue, false otherwise. Has no side effects, does not modify the run
- * queue structure.
+ * Remove the thread from the queue specified by its priority, and clear the
+ * corresponding status bit if the queue becomes empty.
+ *
+ * Returns whether the corresponding queue is empty after removal.
+ */
+bool
+runq_remove(struct runq *rq, struct thread *td)
+{
+ struct rq_queue *rqq;
+ int idx;
+
+ KASSERT(td->td_flags & TDF_INMEM, ("runq_remove: Thread swapped out"));
+ idx = td->td_rqindex;
+ CHECK_IDX(idx);
+ rqq = &rq->rq_queues[idx];
+ CTR4(KTR_RUNQ, "runq_remove: td=%p pri=%d idx=%d rqq=%p",
+ td, td->td_priority, idx, rqq);
+ TAILQ_REMOVE(rqq, td, td_runq);
+ if (TAILQ_EMPTY(rqq)) {
+ runq_sw_set_empty(rq, idx);
+ CTR1(KTR_RUNQ, "runq_remove: queue at idx=%d now empty", idx);
+ return (true);
+ }
+ return (false);
+}
+
+static inline int
+runq_findq_status_word(struct runq *const rq, const int w_idx,
+ const rqsw_t w, runq_pred_t *const pred, void *const pred_data)
+{
+ struct rq_queue *q;
+ rqsw_t tw = w;
+ int idx, b_idx;
+
+ while (tw != 0) {
+ b_idx = RQSW_BSF(tw);
+ idx = RQSW_TO_QUEUE_IDX(w_idx, b_idx);
+ q = &rq->rq_queues[idx];
+ KASSERT(!TAILQ_EMPTY(q),
+ ("runq_findq(): No thread on non-empty queue with idx=%d",
+ idx));
+ if (pred(idx, q, pred_data))
+ return (idx);
+ tw &= ~RQSW_BIT(idx);
+ }
+
+ return (-1);
+}
+
+/*
+ * Find in the passed range (bounds included) the index of the first (i.e.,
+ * having lower index) non-empty queue that passes pred().
+ *
+ * Considered queues are those with index 'lvl_min' up to 'lvl_max' (bounds
+ * included). If no queue matches, returns -1.
+ *
+ * This is done by scanning the status words (a set bit indicates a non-empty
+ * queue) and calling pred() with corresponding queue indices. pred() must
+ * return whether the corresponding queue is accepted. It is passed private
+ * data through 'pred_data', which can be used both for extra input and output.
*/
int
-runq_check(struct runq *rq)
+runq_findq(struct runq *const rq, const int lvl_min, const int lvl_max,
+ runq_pred_t *const pred, void *const pred_data)
{
- struct rqbits *rqb;
- int i;
+ rqsw_t const (*const rqsw)[RQSW_NB] = &rq->rq_status.rq_sw;
+ rqsw_t w;
+ int i, last, idx;
- rqb = &rq->rq_status;
- for (i = 0; i < RQB_LEN; i++)
- if (rqb->rqb_bits[i]) {
- CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
- rqb->rqb_bits[i], i);
- return (1);
- }
- CTR0(KTR_RUNQ, "runq_check: empty");
+ CHECK_IDX(lvl_min);
+ CHECK_IDX(lvl_max);
+ KASSERT(lvl_min <= lvl_max,
+ ("lvl_min: %d > lvl_max: %d!", lvl_min, lvl_max));
- return (0);
+ i = RQSW_IDX(lvl_min);
+ last = RQSW_IDX(lvl_max);
+ /* Clear bits for runqueues below 'lvl_min'. */
+ w = (*rqsw)[i] & ~(RQSW_BIT(lvl_min) - 1);
+ if (i == last)
+ goto last_mask;
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+
+ for (++i; i < last; ++i) {
+ w = (*rqsw)[i];
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+ }
+
+ MPASS(i == last);
+ w = (*rqsw)[i];
+last_mask:
+ /* Clear bits for runqueues above 'lvl_max'. */
+ w &= (RQSW_BIT(lvl_max) - 1) | RQSW_BIT(lvl_max);
+ idx = runq_findq_status_word(rq, i, w, pred, pred_data);
+ if (idx != -1)
+ goto return_idx;
+ return (-1);
+return_idx:
+ CTR4(KTR_RUNQ, "runq_findq: bits=%#x->%#x i=%d idx=%d",
+ (*rqsw)[i], w, i, idx);
+ return (idx);
+}
+
+static bool
+runq_first_thread_pred(const int idx, struct rq_queue *const q, void *const data)
+{
+ struct thread **const tdp = data;
+ struct thread *const td = TAILQ_FIRST(q);
+
+ *tdp = td;
+ return (true);
+}
+
+/* Make sure it has an external definition. */
+extern inline struct thread *
+runq_first_thread_range(struct runq *const rq, const int lvl_min,
+ const int lvl_max)
+{
+ struct thread *td = NULL;
+
+ (void)runq_findq(rq, lvl_min, lvl_max, runq_first_thread_pred, &td);
+ return (td);
+}
+
+static inline struct thread *
+runq_first_thread(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, 0, RQ_NQS - 1));
}
/*
- * Find the highest priority process on the run queue.
+ * Return true if there are some processes of any priority on the run queue,
+ * false otherwise. Has no side effects.
*/
-struct thread *
-runq_choose_fuzz(struct runq *rq, int fuzz)
+bool
+runq_not_empty(struct runq *rq)
{
- struct rqhead *rqh;
- struct thread *td;
- int pri;
+ struct thread *const td = runq_first_thread(rq);
- while ((pri = runq_findbit(rq)) != -1) {
- rqh = &rq->rq_queues[pri];
- /* fuzz == 1 is normal.. 0 or less are ignored */
- if (fuzz > 1) {
- /*
- * In the first couple of entries, check if
- * there is one for our CPU as a preference.
- */
- int count = fuzz;
- int cpu = PCPU_GET(cpuid);
- struct thread *td2;
- td2 = td = TAILQ_FIRST(rqh);
-
- while (count-- && td2) {
- if (td2->td_lastcpu == cpu) {
- td = td2;
- break;
- }
- td2 = TAILQ_NEXT(td2, td_runq);
- }
- } else
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
- CTR3(KTR_RUNQ,
- "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
- return (td);
+ if (td != NULL) {
+ CTR2(KTR_RUNQ, "runq_not_empty: idx=%d, td=%p",
+ td->td_rqindex, td);
+ return (true);
}
- CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
- return (NULL);
+ CTR0(KTR_RUNQ, "runq_not_empty: empty");
+ return (false);
}
/*
@@ -466,73 +573,74 @@
struct thread *
runq_choose(struct runq *rq)
{
- struct rqhead *rqh;
struct thread *td;
- int pri;
- while ((pri = runq_findbit(rq)) != -1) {
- rqh = &rq->rq_queues[pri];
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
- CTR3(KTR_RUNQ,
- "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
+ td = runq_first_thread(rq);
+ if (td != NULL) {
+ CTR2(KTR_RUNQ, "runq_choose: idx=%d td=%p", td->td_rqindex, td);
return (td);
}
- CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
+ CTR0(KTR_RUNQ, "runq_choose: idlethread");
return (NULL);
}
-struct thread *
-runq_choose_from(struct runq *rq, u_char idx)
+struct runq_fuzz_pred_data {
+ int fuzz;
+ struct thread *td;
+};
+
+static bool
+runq_fuzz_pred(const int idx, struct rq_queue *const q, void *const data)
{
- struct rqhead *rqh;
+ struct runq_fuzz_pred_data *const d = data;
+ const int fuzz = d->fuzz;
struct thread *td;
- int pri;
- if ((pri = runq_findbit_from(rq, idx)) != -1) {
- rqh = &rq->rq_queues[pri];
- td = TAILQ_FIRST(rqh);
- KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
- CTR4(KTR_RUNQ,
- "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
- pri, td, td->td_rqindex, rqh);
- return (td);
+ td = TAILQ_FIRST(q);
+
+ if (fuzz > 1) {
+ /*
+ * In the first couple of entries, check if
+ * there is one for our CPU as a preference.
+ */
+ struct thread *td2 = td;
+ int count = fuzz;
+ int cpu = PCPU_GET(cpuid);
+
+ while (count-- != 0 && td2 != NULL) {
+ if (td2->td_lastcpu == cpu) {
+ td = td2;
+ break;
+ }
+ td2 = TAILQ_NEXT(td2, td_runq);
+ }
}
- CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
- return (NULL);
+ d->td = td;
+ return (true);
}
+
/*
- * Remove the thread from the queue specified by its priority, and clear the
- * corresponding status bit if the queue becomes empty.
- * Caller must set state afterwards.
+ * Find the highest priority process on the run queue.
*/
-void
-runq_remove(struct runq *rq, struct thread *td)
+struct thread *
+runq_choose_fuzz(struct runq *rq, int fuzz)
{
+ struct runq_fuzz_pred_data data = {
+ .fuzz = fuzz,
+ .td = NULL
+ };
+ int idx;
- runq_remove_idx(rq, td, NULL);
-}
-
-void
-runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
-{
- struct rqhead *rqh;
- u_char pri;
-
- KASSERT(td->td_flags & TDF_INMEM,
- ("runq_remove_idx: thread swapped out"));
- pri = td->td_rqindex;
- KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
- rqh = &rq->rq_queues[pri];
- CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
- td, td->td_priority, pri, rqh);
- TAILQ_REMOVE(rqh, td, td_runq);
- if (TAILQ_EMPTY(rqh)) {
- CTR0(KTR_RUNQ, "runq_remove_idx: empty");
- runq_clrbit(rq, pri);
- if (idx != NULL && *idx == pri)
- *idx = (pri + 1) % RQ_NQS;
+ idx = runq_findq(rq, 0, RQ_NQS - 1, runq_fuzz_pred, &data);
+ if (idx != -1) {
+ MPASS(data.td != NULL);
+ CTR2(KTR_RUNQ, "runq_choose_fuzz: idx=%d td=%p", idx, data.td);
+ return (data.td);
}
+
+ MPASS(data.td == NULL);
+ CTR0(KTR_RUNQ, "runq_choose_fuzz: idlethread");
+ return (NULL);
}
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -48,6 +48,7 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/smp.h>
@@ -72,15 +73,17 @@
* INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
* the range 100-256 Hz (approximately).
*/
-#define ESTCPULIM(e) \
- min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \
- RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1)
#ifdef SMP
#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus)
#else
#define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */
#endif
#define NICE_WEIGHT 1 /* Priorities per nice level. */
+#define ESTCPULIM(e) \
+ min((e), INVERSE_ESTCPU_WEIGHT * \
+ (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) + \
+ PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) \
+ + INVERSE_ESTCPU_WEIGHT - 1)
#define TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__XSTRING(UINT_MAX)))
@@ -683,13 +686,14 @@
/* Nothing needed. */
}
-int
+bool
sched_runnable(void)
{
#ifdef SMP
- return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]);
+ return (runq_not_empty(&runq) ||
+ runq_not_empty(&runq_pcpu[PCPU_GET(cpuid)]));
#else
- return runq_check(&runq);
+ return (runq_not_empty(&runq));
#endif
}
@@ -871,7 +875,7 @@
if (td->td_priority == prio)
return;
td->td_priority = prio;
- if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {
+ if (TD_ON_RUNQ(td) && td->td_rqindex != RQ_PRI_TO_QUEUE_IDX(prio)) {
sched_rem(td);
sched_add(td, SRQ_BORING | SRQ_HOLDTD);
}
@@ -1682,7 +1686,7 @@
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
- while (sched_runnable() == 0) {
+ while (!sched_runnable()) {
cpu_idle(stat->idlecalls + stat->oldidlecalls > 64);
stat->idlecalls++;
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -52,6 +52,7 @@
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/runq.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/smp.h>
@@ -87,10 +88,9 @@
* Thread scheduler specific section. All fields are protected
* by the thread lock.
*/
-struct td_sched {
- struct runq *ts_runq; /* Run-queue we're queued on. */
+struct td_sched {
short ts_flags; /* TSF_* flags. */
- int ts_cpu; /* CPU that we have affinity for. */
+ int ts_cpu; /* CPU we are on, or were last on. */
int ts_rltick; /* Real last tick, for affinity. */
int ts_slice; /* Ticks of slice remaining. */
u_int ts_slptime; /* Number of ticks we vol. slept */
@@ -130,23 +130,6 @@
#define PRI_MIN_BATCH (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE)
#define PRI_MAX_BATCH PRI_MAX_TIMESHARE
-/*
- * Cpu percentage computation macros and defines.
- *
- * SCHED_TICK_SECS: Number of seconds to average the cpu usage across.
- * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across.
- * SCHED_TICK_MAX: Maximum number of ticks before scaling back.
- * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results.
- * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count.
- * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks.
- */
-#define SCHED_TICK_SECS 10
-#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS)
-#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz)
-#define SCHED_TICK_SHIFT 10
-#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT)
-#define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz))
-
/*
* These macros determine priorities for non-interactive threads. They are
* assigned a priority based on their recent cpu utilization as expressed
@@ -169,6 +152,48 @@
(roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE))
#define SCHED_PRI_NICE(nice) (nice)
+/*
+ * Runqueue indices for the implemented scheduling policies' priority bounds.
+ *
+ * In ULE's implementation, realtime policy covers the ITHD, REALTIME and
+ * INTERACT (see above) ranges, timesharing the BATCH range (see above), and
+ * idle policy the IDLE range.
+ *
+ * Priorities from these ranges must not be assigned to the same runqueue's
+ * queue.
+ */
+#define RQ_RT_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_ITHD))
+#define RQ_RT_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_INTERACT))
+#define RQ_TS_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_BATCH))
+#define RQ_TS_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_BATCH))
+#define RQ_ID_POL_MIN (RQ_PRI_TO_QUEUE_IDX(PRI_MIN_IDLE))
+#define RQ_ID_POL_MAX (RQ_PRI_TO_QUEUE_IDX(PRI_MAX_IDLE))
+
+_Static_assert(RQ_RT_POL_MAX != RQ_TS_POL_MIN,
+ "ULE's realtime and timeshare policies' runqueue ranges overlap");
+_Static_assert(RQ_TS_POL_MAX != RQ_ID_POL_MIN,
+ "ULE's timeshare and idle policies' runqueue ranges overlap");
+
+/* Helper to treat the timeshare range as a circular group of queues. */
+#define RQ_TS_POL_MODULO (RQ_TS_POL_MAX - RQ_TS_POL_MIN + 1)
+
+/*
+ * Cpu percentage computation macros and defines.
+ *
+ * SCHED_TICK_SECS: Number of seconds to average the cpu usage across.
+ * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across.
+ * SCHED_TICK_MAX: Maximum number of ticks before scaling back.
+ * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results.
+ * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count.
+ * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks.
+ */
+#define SCHED_TICK_SECS 10
+#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS)
+#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz)
+#define SCHED_TICK_SHIFT 10
+#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT)
+#define SCHED_TICK_TOTAL(ts) (max((ts)->ts_ltick - (ts)->ts_ftick, hz))
+
/*
* These determine the interactivity of a process. Interactivity differs from
* cpu utilization in that it expresses the voluntary time slept vs time ran
@@ -252,12 +277,10 @@
short tdq_oldswitchcnt; /* (l) Switches last tick. */
u_char tdq_lowpri; /* (ts) Lowest priority thread. */
u_char tdq_owepreempt; /* (f) Remote preemption pending. */
- u_char tdq_idx; /* (t) Current insert index. */
- u_char tdq_ridx; /* (t) Current removal index. */
+ u_char tdq_ts_off; /* (t) TS insertion offset. */
+ u_char tdq_ts_deq_off; /* (t) TS dequeue offset. */
int tdq_id; /* (c) cpuid. */
- struct runq tdq_realtime; /* (t) real-time run queue. */
- struct runq tdq_timeshare; /* (t) timeshare run queue. */
- struct runq tdq_idle; /* (t) Queue of IDLE threads. */
+ struct runq tdq_runq; /* (t) Run queue. */
char tdq_name[TDQ_NAME_LEN];
#ifdef KTR
char tdq_loadname[TDQ_LOADNAME_LEN];
@@ -329,12 +352,17 @@
static void sched_pctcpu_update(struct td_sched *, int);
/* Operations on per processor queues */
+static inline struct thread *runq_choose_realtime(struct runq *const rq);
+static inline struct thread *runq_choose_timeshare(struct runq *const rq,
+ int off);
+static inline struct thread *runq_choose_idle(struct runq *const rq);
static struct thread *tdq_choose(struct tdq *);
+
static void tdq_setup(struct tdq *, int i);
static void tdq_load_add(struct tdq *, struct thread *);
static void tdq_load_rem(struct tdq *, struct thread *);
-static __inline void tdq_runq_add(struct tdq *, struct thread *, int);
-static __inline void tdq_runq_rem(struct tdq *, struct thread *);
+static inline void tdq_runq_add(struct tdq *, struct thread *, int);
+static inline void tdq_runq_rem(struct tdq *, struct thread *);
static inline int sched_shouldpreempt(int, int, int);
static void tdq_print(int cpu);
static void runq_print(struct runq *rq);
@@ -343,8 +371,19 @@
static int tdq_move(struct tdq *, struct tdq *);
static int tdq_idled(struct tdq *);
static void tdq_notify(struct tdq *, int lowpri);
+
+static bool runq_steal_pred(const int idx, struct rq_queue *const q,
+ void *const data);
+static inline struct thread *runq_steal_range(struct runq *const rq,
+ const int lvl_min, const int lvl_max, int cpu);
+static inline struct thread *runq_steal_realtime(struct runq *const rq,
+ int cpu);
+static inline struct thread *runq_steal_timeshare(struct runq *const rq,
+ int cpu, int off);
+static inline struct thread *runq_steal_idle(struct runq *const rq,
+ int cpu);
static struct thread *tdq_steal(struct tdq *, int);
-static struct thread *runq_steal(struct runq *, int);
+
static int sched_pickcpu(struct thread *, int);
static void sched_balance(void);
static bool sched_balance_pair(struct tdq *, struct tdq *);
@@ -386,20 +425,20 @@
static void
runq_print(struct runq *rq)
{
- struct rqhead *rqh;
+ struct rq_queue *rqq;
struct thread *td;
int pri;
int j;
int i;
- for (i = 0; i < RQB_LEN; i++) {
+ for (i = 0; i < RQSW_NB; i++) {
printf("\t\trunq bits %d 0x%zx\n",
- i, rq->rq_status.rqb_bits[i]);
- for (j = 0; j < RQB_BPW; j++)
- if (rq->rq_status.rqb_bits[i] & (1ul << j)) {
- pri = j + (i << RQB_L2BPW);
- rqh = &rq->rq_queues[pri];
- TAILQ_FOREACH(td, rqh, td_runq) {
+ i, rq->rq_status.rq_sw[i]);
+ for (j = 0; j < RQSW_BPW; j++)
+ if (rq->rq_status.rq_sw[i] & (1ul << j)) {
+ pri = RQSW_TO_QUEUE_IDX(i, j);
+ rqq = &rq->rq_queues[pri];
+ TAILQ_FOREACH(td, rqq, td_runq) {
printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n",
td, td->td_name, td->td_priority,
td->td_rqindex, pri);
@@ -419,21 +458,17 @@
tdq = TDQ_CPU(cpu);
printf("tdq %d:\n", TDQ_ID(tdq));
- printf("\tlock %p\n", TDQ_LOCKPTR(tdq));
- printf("\tLock name: %s\n", tdq->tdq_name);
- printf("\tload: %d\n", tdq->tdq_load);
- printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt);
- printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
- printf("\ttimeshare idx: %d\n", tdq->tdq_idx);
- printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx);
+ printf("\tlock %p\n", TDQ_LOCKPTR(tdq));
+ printf("\tLock name: %s\n", tdq->tdq_name);
+ printf("\tload: %d\n", tdq->tdq_load);
+ printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt);
+ printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
+ printf("\tTS insert offset: %d\n", tdq->tdq_ts_off);
+ printf("\tTS dequeue offset: %d\n", tdq->tdq_ts_deq_off);
printf("\tload transferable: %d\n", tdq->tdq_transferable);
printf("\tlowest priority: %d\n", tdq->tdq_lowpri);
- printf("\trealtime runq:\n");
- runq_print(&tdq->tdq_realtime);
- printf("\ttimeshare runq:\n");
- runq_print(&tdq->tdq_timeshare);
- printf("\tidle runq:\n");
- runq_print(&tdq->tdq_idle);
+ printf("\trunq:\n");
+ runq_print(&tdq->tdq_runq);
}
static inline int
@@ -474,11 +509,11 @@
* date with what is actually on the run-queue. Selects the correct
* queue position for timeshare threads.
*/
-static __inline void
+static inline void
tdq_runq_add(struct tdq *tdq, struct thread *td, int flags)
{
struct td_sched *ts;
- u_char pri;
+ u_char pri, idx;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
@@ -490,62 +525,68 @@
tdq->tdq_transferable++;
ts->ts_flags |= TSF_XFERABLE;
}
- if (pri < PRI_MIN_BATCH) {
- ts->ts_runq = &tdq->tdq_realtime;
- } else if (pri <= PRI_MAX_BATCH) {
- ts->ts_runq = &tdq->tdq_timeshare;
- KASSERT(pri <= PRI_MAX_BATCH && pri >= PRI_MIN_BATCH,
- ("Invalid priority %d on timeshare runq", pri));
+ if (PRI_MIN_BATCH <= pri && pri <= PRI_MAX_BATCH) {
/*
- * This queue contains only priorities between MIN and MAX
- * batch. Use the whole queue to represent these values.
+ * The queues allocated to the batch range are not used as
+ * a simple array but as a "circular" one where the insertion
+ * index (derived from 'pri') is offset by 'tdq_ts_off'. 'idx'
+ * is first set to the offset of the wanted queue in the TS'
+ * selection policy range.
*/
- if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) == 0) {
- pri = RQ_NQS * (pri - PRI_MIN_BATCH) / PRI_BATCH_RANGE;
- pri = (pri + tdq->tdq_idx) % RQ_NQS;
+ if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) != 0)
+ /* Current queue from which processes are being run. */
+ idx = tdq->tdq_ts_deq_off;
+ else {
+ idx = (RQ_PRI_TO_QUEUE_IDX(pri) - RQ_TS_POL_MIN +
+ tdq->tdq_ts_off) % RQ_TS_POL_MODULO;
/*
- * This effectively shortens the queue by one so we
- * can have a one slot difference between idx and
- * ridx while we wait for threads to drain.
+ * We avoid enqueuing low priority threads in the queue
+ * that we are still draining, effectively shortening
+ * the runqueue by one queue.
*/
- if (tdq->tdq_ridx != tdq->tdq_idx &&
- pri == tdq->tdq_ridx)
- pri = (unsigned char)(pri - 1) % RQ_NQS;
- } else
- pri = tdq->tdq_ridx;
- runq_add_pri(ts->ts_runq, td, pri, flags);
- return;
+ if (tdq->tdq_ts_deq_off != tdq->tdq_ts_off &&
+ idx == tdq->tdq_ts_deq_off)
+ /* Ensure the dividend is positive. */
+ idx = (idx - 1 + RQ_TS_POL_MODULO) %
+ RQ_TS_POL_MODULO;
+ }
+ /* Absolute queue index. */
+ idx += RQ_TS_POL_MIN;
+ runq_add_idx(&tdq->tdq_runq, td, idx, flags);
} else
- ts->ts_runq = &tdq->tdq_idle;
- runq_add(ts->ts_runq, td, flags);
+ runq_add(&tdq->tdq_runq, td, flags);
}
-/*
+/*
* Remove a thread from a run-queue. This typically happens when a thread
* is selected to run. Running threads are not on the queue and the
* transferable count does not reflect them.
*/
-static __inline void
+static inline void
tdq_runq_rem(struct tdq *tdq, struct thread *td)
{
struct td_sched *ts;
+ bool queue_empty;
ts = td_get_sched(td);
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
- KASSERT(ts->ts_runq != NULL,
- ("tdq_runq_remove: thread %p null ts_runq", td));
if (ts->ts_flags & TSF_XFERABLE) {
tdq->tdq_transferable--;
ts->ts_flags &= ~TSF_XFERABLE;
}
- if (ts->ts_runq == &tdq->tdq_timeshare) {
- if (tdq->tdq_idx != tdq->tdq_ridx)
- runq_remove_idx(ts->ts_runq, td, &tdq->tdq_ridx);
- else
- runq_remove_idx(ts->ts_runq, td, NULL);
- } else
- runq_remove(ts->ts_runq, td);
+ queue_empty = runq_remove(&tdq->tdq_runq, td);
+ /*
+ * If thread has a batch priority and the queue from which it was
+ * removed is now empty, advance the batch's queue removal index if it
+ * lags with respect to the batch's queue insertion index.
+ */
+ if (queue_empty && PRI_MIN_BATCH <= td->td_priority &&
+ td->td_priority <= PRI_MAX_BATCH &&
+ tdq->tdq_ts_off != tdq->tdq_ts_deq_off &&
+ tdq->tdq_ts_deq_off == td->td_rqindex)
+ tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) %
+ RQ_TS_POL_MODULO;
}
/*
@@ -1178,82 +1219,84 @@
ipi_cpu(cpu, IPI_PREEMPT);
}
+struct runq_steal_pred_data {
+ struct thread *td;
+ int cpu;
+};
+
+static bool
+runq_steal_pred(const int idx, struct rq_queue *const q, void *const data)
+{
+ struct runq_steal_pred_data *const d = data;
+ struct thread *td;
+
+ TAILQ_FOREACH(td, q, td_runq) {
+ if (THREAD_CAN_MIGRATE(td) && THREAD_CAN_SCHED(td, d->cpu)) {
+ d->td = td;
+ return (true);
+ }
+ }
+
+ return (false);
+}
+
+/*
+ * Steals load contained in queues with indices in the specified range.
+ */
+static inline struct thread *
+runq_steal_range(struct runq *const rq, const int lvl_min, const int lvl_max,
+ int cpu)
+{
+ struct runq_steal_pred_data data = {
+ .td = NULL,
+ .cpu = cpu,
+ };
+ int idx;
+
+ idx = runq_findq(rq, lvl_min, lvl_max, &runq_steal_pred, &data);
+ if (idx != -1) {
+ MPASS(data.td != NULL);
+ return (data.td);
+ }
+
+ MPASS(data.td == NULL);
+ return (NULL);
+}
+
+static inline struct thread *
+runq_steal_realtime(struct runq *const rq, int cpu)
+{
+
+ return (runq_steal_range(rq, RQ_RT_POL_MIN, RQ_RT_POL_MAX, cpu));
+}
+
/*
* Steals load from a timeshare queue. Honors the rotating queue head
* index.
*/
-static struct thread *
-runq_steal_from(struct runq *rq, int cpu, u_char start)
+static inline struct thread *
+runq_steal_timeshare(struct runq *const rq, int cpu, int off)
{
- struct rqbits *rqb;
- struct rqhead *rqh;
- struct thread *td, *first;
- int bit;
- int i;
-
- rqb = &rq->rq_status;
- bit = start & (RQB_BPW -1);
- first = NULL;
-again:
- for (i = RQB_WORD(start); i < RQB_LEN; bit = 0, i++) {
- if (rqb->rqb_bits[i] == 0)
- continue;
- if (bit == 0)
- bit = RQB_FFS(rqb->rqb_bits[i]);
- for (; bit < RQB_BPW; bit++) {
- if ((rqb->rqb_bits[i] & (1ul << bit)) == 0)
- continue;
- rqh = &rq->rq_queues[bit + (i << RQB_L2BPW)];
- TAILQ_FOREACH(td, rqh, td_runq) {
- if (first) {
- if (THREAD_CAN_MIGRATE(td) &&
- THREAD_CAN_SCHED(td, cpu))
- return (td);
- } else
- first = td;
- }
- }
- }
- if (start != 0) {
- start = 0;
- goto again;
- }
-
- if (first && THREAD_CAN_MIGRATE(first) &&
- THREAD_CAN_SCHED(first, cpu))
- return (first);
- return (NULL);
-}
-
-/*
- * Steals load from a standard linear queue.
- */
-static struct thread *
-runq_steal(struct runq *rq, int cpu)
-{
- struct rqhead *rqh;
- struct rqbits *rqb;
struct thread *td;
- int word;
- int bit;
- rqb = &rq->rq_status;
- for (word = 0; word < RQB_LEN; word++) {
- if (rqb->rqb_bits[word] == 0)
- continue;
- for (bit = 0; bit < RQB_BPW; bit++) {
- if ((rqb->rqb_bits[word] & (1ul << bit)) == 0)
- continue;
- rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)];
- TAILQ_FOREACH(td, rqh, td_runq)
- if (THREAD_CAN_MIGRATE(td) &&
- THREAD_CAN_SCHED(td, cpu))
- return (td);
- }
- }
- return (NULL);
+ MPASS(0 <= off && off < RQ_TS_POL_MODULO);
+
+ td = runq_steal_range(rq, RQ_TS_POL_MIN + off, RQ_TS_POL_MAX, cpu);
+ if (td != NULL || off == 0)
+ return (td);
+
+ td = runq_steal_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1, cpu);
+ return (td);
}
+static inline struct thread *
+runq_steal_idle(struct runq *const rq, int cpu)
+{
+
+ return (runq_steal_range(rq, RQ_ID_POL_MIN, RQ_ID_POL_MAX, cpu));
+}
+
+
/*
* Attempt to steal a thread in priority order from a thread queue.
*/
@@ -1263,12 +1306,13 @@
struct thread *td;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- if ((td = runq_steal(&tdq->tdq_realtime, cpu)) != NULL)
+ td = runq_steal_realtime(&tdq->tdq_runq, cpu);
+ if (td != NULL)
return (td);
- if ((td = runq_steal_from(&tdq->tdq_timeshare,
- cpu, tdq->tdq_ridx)) != NULL)
+ td = runq_steal_timeshare(&tdq->tdq_runq, cpu, tdq->tdq_ts_deq_off);
+ if (td != NULL)
return (td);
- return (runq_steal(&tdq->tdq_idle, cpu));
+ return (runq_steal_idle(&tdq->tdq_runq, cpu));
}
/*
@@ -1450,6 +1494,35 @@
}
#endif
+static inline struct thread *
+runq_choose_realtime(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, RQ_RT_POL_MIN, RQ_RT_POL_MAX));
+}
+
+static struct thread *
+runq_choose_timeshare(struct runq *const rq, int off)
+{
+ struct thread *td;
+
+ MPASS(0 <= off && off < RQ_TS_POL_MODULO);
+
+ td = runq_first_thread_range(rq, RQ_TS_POL_MIN + off, RQ_TS_POL_MAX);
+ if (td != NULL || off == 0)
+ return (td);
+
+ td = runq_first_thread_range(rq, RQ_TS_POL_MIN, RQ_TS_POL_MIN + off - 1);
+ return (td);
+}
+
+static inline struct thread *
+runq_choose_idle(struct runq *const rq)
+{
+
+ return (runq_first_thread_range(rq, RQ_ID_POL_MIN, RQ_ID_POL_MAX));
+}
+
/*
* Pick the highest priority task we have and return it.
*/
@@ -1459,17 +1532,17 @@
struct thread *td;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- td = runq_choose(&tdq->tdq_realtime);
+ td = runq_choose_realtime(&tdq->tdq_runq);
if (td != NULL)
return (td);
- td = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
+ td = runq_choose_timeshare(&tdq->tdq_runq, tdq->tdq_ts_deq_off);
if (td != NULL) {
KASSERT(td->td_priority >= PRI_MIN_BATCH,
("tdq_choose: Invalid priority on timeshare queue %d",
td->td_priority));
return (td);
}
- td = runq_choose(&tdq->tdq_idle);
+ td = runq_choose_idle(&tdq->tdq_runq);
if (td != NULL) {
KASSERT(td->td_priority >= PRI_MIN_IDLE,
("tdq_choose: Invalid priority on idle queue %d",
@@ -1489,9 +1562,7 @@
if (bootverbose)
printf("ULE: setup cpu %d\n", id);
- runq_init(&tdq->tdq_realtime);
- runq_init(&tdq->tdq_timeshare);
- runq_init(&tdq->tdq_idle);
+ runq_init(&tdq->tdq_runq);
tdq->tdq_id = id;
snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),
"sched lock %d", (int)TDQ_ID(tdq));
@@ -2595,13 +2666,14 @@
tdq->tdq_switchcnt = tdq->tdq_load;
/*
- * Advance the insert index once for each tick to ensure that all
+ * Advance the insert offset once for each tick to ensure that all
* threads get a chance to run.
*/
- if (tdq->tdq_idx == tdq->tdq_ridx) {
- tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS;
- if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx]))
- tdq->tdq_ridx = tdq->tdq_idx;
+ if (tdq->tdq_ts_off == tdq->tdq_ts_deq_off) {
+ tdq->tdq_ts_off = (tdq->tdq_ts_off + 1) % RQ_TS_POL_MODULO;
+ if (runq_is_queue_empty(&tdq->tdq_runq,
+ tdq->tdq_ts_deq_off + RQ_TS_POL_MIN))
+ tdq->tdq_ts_deq_off = tdq->tdq_ts_off;
}
ts = td_get_sched(td);
sched_pctcpu_update(ts, 1);
@@ -2655,24 +2727,20 @@
* Return whether the current CPU has runnable tasks. Used for in-kernel
* cooperative idle threads.
*/
-int
+bool
sched_runnable(void)
{
struct tdq *tdq;
- int load;
-
- load = 1;
tdq = TDQ_SELF();
if ((curthread->td_flags & TDF_IDLETD) != 0) {
if (TDQ_LOAD(tdq) > 0)
- goto out;
+ return (true);
} else
if (TDQ_LOAD(tdq) - 1 > 0)
- goto out;
- load = 0;
-out:
- return (load);
+ return (true);
+
+ return (false);
}
/*
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
--- a/sys/kern/subr_log.c
+++ b/sys/kern/subr_log.c
@@ -47,7 +47,7 @@
#include <sys/filedesc.h>
#include <sys/sysctl.h>
-#define LOG_RDPRI (PZERO + 1)
+#define LOG_RDPRI PZERO
#define LOG_ASYNC 0x04
diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
--- a/sys/kern/sysv_msg.c
+++ b/sys/kern/sysv_msg.c
@@ -894,7 +894,7 @@
we_own_it = 1;
}
DPRINTF(("msgsnd: goodnight\n"));
- error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
+ error = msleep(msqkptr, &msq_mtx, PVFS | PCATCH,
"msgsnd", hz);
DPRINTF(("msgsnd: good morning, error=%d\n", error));
if (we_own_it)
@@ -1303,7 +1303,7 @@
*/
DPRINTF(("msgrcv: goodnight\n"));
- error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
+ error = msleep(msqkptr, &msq_mtx, PVFS | PCATCH,
"msgrcv", 0);
DPRINTF(("msgrcv: good morning (error=%d)\n", error));
diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
--- a/sys/kern/sysv_sem.c
+++ b/sys/kern/sysv_sem.c
@@ -1309,7 +1309,7 @@
semptr->semncnt++;
DPRINTF(("semop: good night!\n"));
- error = msleep_sbt(semakptr, sema_mtxp, (PZERO - 4) | PCATCH,
+ error = msleep_sbt(semakptr, sema_mtxp, PVFS | PCATCH,
"semwait", sbt, precision, C_ABSOLUTE);
DPRINTF(("semop: good morning (error=%d)!\n", error));
/* return code is checked below, after sem[nz]cnt-- */
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -756,7 +756,7 @@
break;
}
error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
- (PRIBIO + 4) | slpflag, "newbuf", slptimeo);
+ PVFS | slpflag, "newbuf", slptimeo);
if (error != 0)
break;
}
@@ -2654,8 +2654,7 @@
mtx_lock(&bdirtylock);
while (buf_dirty_count_severe()) {
bdirtywait = 1;
- msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
- "flswai", 0);
+ msleep(&bdirtywait, &bdirtylock, PVFS, "flswai", 0);
}
mtx_unlock(&bdirtylock);
}
@@ -5234,7 +5233,7 @@
while (bo->bo_numoutput) {
bo->bo_flag |= BO_WWAIT;
error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo),
- slpflag | (PRIBIO + 1), "bo_wwait", timeo);
+ slpflag | PRIBIO, "bo_wwait", timeo);
if (error)
break;
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -787,7 +787,7 @@
}
DROP_GIANT();
sleepq_add(&fp->f_vnread_flags, NULL, "vofflock", 0, 0);
- sleepq_wait(&fp->f_vnread_flags, PUSER -1);
+ sleepq_wait(&fp->f_vnread_flags, PRI_MAX_KERN);
PICKUP_GIANT();
sleepq_lock(&fp->f_vnread_flags);
state = atomic_load_16(flagsp);
@@ -849,7 +849,7 @@
if ((flags & FOF_NOLOCK) == 0) {
while (fp->f_vnread_flags & FOFFSET_LOCKED) {
fp->f_vnread_flags |= FOFFSET_LOCK_WAITING;
- msleep(&fp->f_vnread_flags, mtxp, PUSER -1,
+ msleep(&fp->f_vnread_flags, mtxp, PRI_MAX_KERN,
"vofflock", 0);
}
fp->f_vnread_flags |= FOFFSET_LOCKED;
@@ -1897,7 +1897,7 @@
if (flags & V_PCATCH)
mflags |= PCATCH;
}
- mflags |= (PUSER - 1);
+ mflags |= PRI_MAX_KERN;
while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
if ((flags & V_NOWAIT) != 0) {
error = EWOULDBLOCK;
@@ -2022,7 +2022,7 @@
if ((flags & V_PCATCH) != 0)
mflags |= PCATCH;
}
- mflags |= (PUSER - 1) | PDROP;
+ mflags |= PRI_MAX_KERN | PDROP;
error = msleep(&mp->mnt_flag, MNT_MTX(mp), mflags, "suspfs", 0);
vfs_rel(mp);
if (error == 0)
@@ -2107,7 +2107,7 @@
return (EALREADY);
}
while (mp->mnt_kern_flag & MNTK_SUSPEND)
- msleep(&mp->mnt_flag, MNT_MTX(mp), PUSER - 1, "wsuspfs", 0);
+ msleep(&mp->mnt_flag, MNT_MTX(mp), PRI_MAX_KERN, "wsuspfs", 0);
/*
* Unmount holds a write reference on the mount point. If we
@@ -2128,7 +2128,7 @@
mp->mnt_susp_owner = curthread;
if (mp->mnt_writeopcount > 0)
(void) msleep(&mp->mnt_writeopcount,
- MNT_MTX(mp), (PUSER - 1)|PDROP, "suspwt", 0);
+ MNT_MTX(mp), PRI_MAX_KERN | PDROP, "suspwt", 0);
else
MNT_IUNLOCK(mp);
if ((error = VFS_SYNC(mp, MNT_SUSPEND)) != 0) {
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -834,7 +834,7 @@
tp->tun_flags &= ~TUN_RWAIT;
wakeup(tp);
}
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ selwakeuppri(&tp->tun_rsel, PZERO);
KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
TUN_UNLOCK(tp);
@@ -1172,7 +1172,7 @@
CURVNET_RESTORE();
funsetown(&tp->tun_sigio);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ selwakeuppri(&tp->tun_rsel, PZERO);
KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
TUNDEBUG (ifp, "closed\n");
tp->tun_flags &= ~TUN_OPEN;
@@ -1706,7 +1706,7 @@
return (EWOULDBLOCK);
}
tp->tun_flags |= TUN_RWAIT;
- error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | PZERO,
"tunread", 0);
if (error != 0) {
TUN_UNLOCK(tp);
diff --git a/sys/netgraph/ng_device.c b/sys/netgraph/ng_device.c
--- a/sys/netgraph/ng_device.c
+++ b/sys/netgraph/ng_device.c
@@ -462,7 +462,7 @@
mtx_lock(&priv->ngd_mtx);
priv->flags |= NGDF_RWAIT;
if ((error = msleep(priv, &priv->ngd_mtx,
- PDROP | PCATCH | (PZERO + 1),
+ PDROP | PCATCH | PZERO,
"ngdread", 0)) != 0)
return (error);
}
diff --git a/sys/powerpc/include/runq.h b/sys/powerpc/include/runq.h
deleted file mode 100644
--- a/sys/powerpc/include/runq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#ifdef __powerpc64__
-#define RQB_LEN (1UL) /* Number of priority status words. */
-#define RQB_L2BPW (6UL) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#else
-#define RQB_LEN (2) /* Number of priority status words. */
-#define RQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#endif
-#define RQB_BPW (1UL<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1UL << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-#ifdef __powerpc64__
-typedef u_int64_t rqb_word_t;
-#else
-typedef u_int32_t rqb_word_t;
-#endif
-
-#endif
diff --git a/sys/riscv/include/runq.h b/sys/riscv/include/runq.h
deleted file mode 100644
--- a/sys/riscv/include/runq.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _MACHINE_RUNQ_H_
-#define _MACHINE_RUNQ_H_
-
-#define RQB_LEN (1) /* Number of priority status words. */
-#define RQB_L2BPW (6) /* Log2(sizeof(rqb_word_t) * NBBY)). */
-#define RQB_BPW (1<<RQB_L2BPW) /* Bits in an rqb_word_t. */
-
-#define RQB_BIT(pri) (1ul << ((pri) & (RQB_BPW - 1)))
-#define RQB_WORD(pri) ((pri) >> RQB_L2BPW)
-
-#define RQB_FFS(word) (ffsl(word) - 1)
-
-/*
- * Type of run queue status word.
- */
-typedef unsigned long rqb_word_t;
-
-#endif
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -296,7 +296,7 @@
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp, wmesg) \
- lockinit(&(bp)->b_lock, PRIBIO + 4, wmesg, 0, LK_NEW)
+ lockinit(&(bp)->b_lock, PVFS, wmesg, 0, LK_NEW)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@@ -311,7 +311,7 @@
*/
#define BUF_TIMELOCK(bp, locktype, interlock, wmesg, catch, timo) \
_lockmgr_args_rw(&(bp)->b_lock, (locktype) | LK_TIMELOCK, \
- (interlock), (wmesg), (PRIBIO + 4) | (catch), (timo), \
+ (interlock), (wmesg), PVFS | (catch), (timo), \
LOCK_FILE, LOCK_LINE)
/*
diff --git a/sys/sys/param.h b/sys/sys/param.h
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -73,7 +73,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500018
+#define __FreeBSD_version 1500019
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/priority.h b/sys/sys/priority.h
--- a/sys/sys/priority.h
+++ b/sys/sys/priority.h
@@ -64,17 +64,23 @@
*/
/*
- * Priorities range from 0 to 255, but differences of less then 4 (RQ_PPQ)
- * are insignificant. Ranges are as follows:
+ * Priorities range from 0 to 255. Ranges are as follows:
*
- * Interrupt threads: 0 - 15
- * Realtime user threads: 16 - 47
- * Top half kernel threads: 48 - 87
- * Time sharing user threads: 88 - 223
+ * Interrupt threads: 0 - 7
+ * Realtime user threads: 8 - 39
+ * Top half kernel threads: 40 - 55
+ * Time sharing user threads: 56 - 223
* Idle user threads: 224 - 255
*
- * XXX If/When the specific interrupt thread and top half thread ranges
- * disappear, a larger range can be used for user processes.
+ * Priority levels of rtprio(2)'s RTP_PRIO_FIFO and RTP_PRIO_REALTIME and
+ * POSIX's SCHED_FIFO and SCHED_RR are directly mapped to the internal realtime
+ * range mentioned above by a simple translation. This range's length
+ * consequently cannot be changed without impacts on the scheduling priority
+ * code, and in any case must never be smaller than 32 for POSIX compliance and
+ * rtprio(2) backwards compatibility. Similarly, priority levels of rtprio(2)'s
+ * RTP_PRIO_IDLE are directly mapped to the internal idle range above (and,
+ * soon, those of the to-be-introduced SCHED_IDLE policy as well), so changing
+ * that range is subject to the same caveats and restrictions.
*/
#define PRI_MIN (0) /* Highest priority. */
@@ -88,34 +94,34 @@
* decay to lower priorities if they run for full time slices.
*/
#define PI_REALTIME (PRI_MIN_ITHD + 0)
-#define PI_INTR (PRI_MIN_ITHD + 4)
+#define PI_INTR (PRI_MIN_ITHD + 1)
#define PI_AV PI_INTR
#define PI_NET PI_INTR
#define PI_DISK PI_INTR
#define PI_TTY PI_INTR
#define PI_DULL PI_INTR
-#define PI_SOFT (PRI_MIN_ITHD + 8)
+#define PI_SOFT (PRI_MIN_ITHD + 2)
#define PI_SOFTCLOCK PI_SOFT
#define PI_SWI(x) PI_SOFT
-#define PRI_MIN_REALTIME (16)
+#define PRI_MIN_REALTIME (8)
#define PRI_MAX_REALTIME (PRI_MIN_KERN - 1)
-#define PRI_MIN_KERN (48)
+#define PRI_MIN_KERN (40)
#define PRI_MAX_KERN (PRI_MIN_TIMESHARE - 1)
#define PSWP (PRI_MIN_KERN + 0)
-#define PVM (PRI_MIN_KERN + 4)
-#define PINOD (PRI_MIN_KERN + 8)
-#define PRIBIO (PRI_MIN_KERN + 12)
-#define PVFS (PRI_MIN_KERN + 16)
-#define PZERO (PRI_MIN_KERN + 20)
-#define PSOCK (PRI_MIN_KERN + 24)
-#define PWAIT (PRI_MIN_KERN + 28)
-#define PLOCK (PRI_MIN_KERN + 32)
-#define PPAUSE (PRI_MIN_KERN + 36)
+#define PVM (PRI_MIN_KERN + 1)
+#define PINOD (PRI_MIN_KERN + 2)
+#define PRIBIO (PRI_MIN_KERN + 3)
+#define PVFS (PRI_MIN_KERN + 4)
+#define PZERO (PRI_MIN_KERN + 5)
+#define PSOCK (PRI_MIN_KERN + 6)
+#define PWAIT (PRI_MIN_KERN + 7)
+#define PLOCK (PRI_MIN_KERN + 8)
+#define PPAUSE (PRI_MIN_KERN + 9)
-#define PRI_MIN_TIMESHARE (88)
+#define PRI_MIN_TIMESHARE (56)
#define PRI_MAX_TIMESHARE (PRI_MIN_IDLE - 1)
#define PUSER (PRI_MIN_TIMESHARE)
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -53,7 +53,6 @@
#include <sys/osd.h>
#include <sys/priority.h>
#include <sys/rtprio.h> /* XXX. */
-#include <sys/runq.h>
#include <sys/resource.h>
#include <sys/sigio.h>
#include <sys/signal.h>
diff --git a/sys/sys/runq.h b/sys/sys/runq.h
--- a/sys/sys/runq.h
+++ b/sys/sys/runq.h
@@ -29,7 +29,11 @@
#ifndef _RUNQ_H_
#define _RUNQ_H_
-#include <machine/runq.h>
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+#include <sys/types.h> /* For bool. */
struct thread;
@@ -37,20 +41,65 @@
* Run queue parameters.
*/
-#define RQ_NQS (64) /* Number of run queues. */
-#define RQ_PPQ (4) /* Priorities per queue. */
+#define RQ_MAX_PRIO (255) /* Maximum priority (minimum is 0). */
+#define RQ_PPQ (1) /* Priorities per queue. */
/*
- * Head of run queues.
+ * Convenience macros from <sys/param.h>.
*/
-TAILQ_HEAD(rqhead, thread);
+#ifndef NBBY
+#define NBBY 8
+#endif
+#ifndef howmany
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#endif
+
+/*
+ * Deduced from the above parameters and machine ones.
+ */
+#define RQ_NQS (howmany(RQ_MAX_PRIO + 1, RQ_PPQ)) /* Number of run queues. */
+#define RQ_PRI_TO_QUEUE_IDX(pri) ((pri) / RQ_PPQ) /* Priority to queue index. */
+
+typedef unsigned long rqsw_t; /* runq's status words type. */
+#define RQSW_BPW (sizeof(rqsw_t) * NBBY) /* Bits per runq word. */
+#if defined(_LP64)
+#define RQSW_L2BPW (6) /* Log2(sizeof(rqsw_t) * NBBY)). */
+#elif defined(_ILP32)
+#define RQSW_L2BPW (5) /* Log2(sizeof(rqsw_t) * NBBY)). */
+#else
+#error Not _LP64 nor _ILP32!
+#endif
+/*
+ * That RQSW_BPW and RQSW_L2BPW are consistent is checked by a static assertion.
+ */
+
+/* Number of status words to cover RQ_NQS queues. */
+#define RQSW_NB (howmany(RQ_NQS, RQSW_BPW))
+#define RQSW_IDX(idx) ((idx) >> RQSW_L2BPW)
+#define RQSW_BIT_IDX(idx) ((idx) & (RQSW_BPW - 1))
+#define RQSW_BIT(idx) (1ul << RQSW_BIT_IDX(idx))
+#define RQSW_BSF(word) ({ \
+ int _res = ffsl((long)(word)); /* Assumes two-complement. */ \
+ MPASS(_res > 0); \
+ _res - 1; \
+})
+#define RQSW_TO_QUEUE_IDX(word_idx, bit_idx) \
+ (((word_idx) << RQSW_L2BPW) + (bit_idx))
+#define RQSW_FIRST_QUEUE_IDX(word_idx, word) \
+ RQSW_TO_QUEUE_IDX(word_idx, RQSW_BSF(word))
+
+
+/*
+ * The queue for a given index as a list of threads.
+ */
+TAILQ_HEAD(rq_queue, thread);
/*
* Bit array which maintains the status of a run queue. When a queue is
* non-empty the bit corresponding to the queue number will be set.
*/
-struct rqbits {
- rqb_word_t rqb_bits[RQB_LEN];
+struct rq_status {
+ rqsw_t rq_sw[RQSW_NB];
};
/*
@@ -58,18 +107,29 @@
* are placed, and a structure to maintain the status of each queue.
*/
struct runq {
- struct rqbits rq_status;
- struct rqhead rq_queues[RQ_NQS];
+ struct rq_status rq_status;
+ struct rq_queue rq_queues[RQ_NQS];
};
-void runq_add(struct runq *, struct thread *, int);
-void runq_add_pri(struct runq *, struct thread *, u_char, int);
-int runq_check(struct runq *);
-struct thread *runq_choose(struct runq *);
-struct thread *runq_choose_from(struct runq *, u_char);
-struct thread *runq_choose_fuzz(struct runq *, int);
void runq_init(struct runq *);
-void runq_remove(struct runq *, struct thread *);
-void runq_remove_idx(struct runq *, struct thread *, u_char *);
+bool runq_is_queue_empty(struct runq *, int _idx);
+void runq_add(struct runq *, struct thread *, int _flags);
+void runq_add_idx(struct runq *, struct thread *, int _idx, int _flags);
+bool runq_remove(struct runq *, struct thread *);
+
+/*
+ * Implementation helpers for common and scheduler-specific runq_choose*()
+ * functions.
+ */
+typedef bool runq_pred_t(int _idx, struct rq_queue *, void *_data);
+int runq_findq(struct runq *const rq, const int lvl_min,
+ const int lvl_max,
+ runq_pred_t *const pred, void *const pred_data);
+struct thread *runq_first_thread_range(struct runq *const rq,
+ const int lvl_min, const int lvl_max);
+
+bool runq_not_empty(struct runq *);
+struct thread *runq_choose(struct runq *);
+struct thread *runq_choose_fuzz(struct runq *, int _fuzz);
#endif
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -63,6 +63,9 @@
#define _SCHED_H_
#ifdef _KERNEL
+
+#include <sys/types.h> /* For bool. */
+
/*
* General scheduling info.
*
@@ -74,7 +77,7 @@
*/
int sched_load(void);
int sched_rr_interval(void);
-int sched_runnable(void);
+bool sched_runnable(void);
/*
* Proc related scheduling hooks.
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -497,7 +497,7 @@
while (mp->mnt_secondary_writes != 0) {
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
MNT_ILOCK(mp);
}
@@ -14561,7 +14561,7 @@
while (mp->mnt_secondary_writes != 0) {
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
MNT_ILOCK(mp);
}
@@ -14601,7 +14601,7 @@
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes,
MNT_MTX(mp),
- (PUSER - 1) | PDROP, "secwr", 0);
+ PRI_MAX_KERN | PDROP, "secwr", 0);
BO_LOCK(bo);
continue;
}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1399,8 +1399,7 @@
VI_LOCK(vp);
while (ip->i_flag & IN_EA_LOCKED) {
UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
- msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
- 0);
+ msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD, "ufs_ea", 0);
}
UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
VI_UNLOCK(vp);
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -179,7 +179,7 @@
if ((dq = ip->i_dquot[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq1");
+ DQI_WAIT(dq, PINOD, "chkdq1");
ncurblocks = dq->dq_curblocks + change;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
@@ -201,7 +201,7 @@
continue;
warn = 0;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq2");
+ DQI_WAIT(dq, PINOD, "chkdq2");
if (do_check) {
error = chkdqchg(ip, change, cred, i, &warn);
if (error) {
@@ -215,7 +215,7 @@
if (dq == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkdq3");
+ DQI_WAIT(dq, PINOD, "chkdq3");
ncurblocks = dq->dq_curblocks - change;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
@@ -320,7 +320,7 @@
if ((dq = ip->i_dquot[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq1");
+ DQI_WAIT(dq, PINOD, "chkiq1");
if (dq->dq_curinodes >= -change)
dq->dq_curinodes += change;
else
@@ -341,7 +341,7 @@
continue;
warn = 0;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq2");
+ DQI_WAIT(dq, PINOD, "chkiq2");
if (do_check) {
error = chkiqchg(ip, change, cred, i, &warn);
if (error) {
@@ -355,7 +355,7 @@
if (dq == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "chkiq3");
+ DQI_WAIT(dq, PINOD, "chkiq3");
if (dq->dq_curinodes >= change)
dq->dq_curinodes -= change;
else
@@ -855,7 +855,7 @@
return (error);
dq = ndq;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "setqta");
+ DQI_WAIT(dq, PINOD, "setqta");
/*
* Copy all but the current values.
* Reset time limit if previously had no soft limit or were
@@ -918,7 +918,7 @@
return (error);
dq = ndq;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "setuse");
+ DQI_WAIT(dq, PINOD, "setuse");
/*
* Reset time limit if have a soft limit and were
* previously under it, but are now over it.
@@ -1314,7 +1314,7 @@
if (dq != NULL) {
DQH_UNLOCK();
hfound: DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "dqget");
+ DQI_WAIT(dq, PINOD, "dqget");
DQI_UNLOCK(dq);
if (dq->dq_ump == NULL) {
dqrele(vp, dq);
@@ -1588,7 +1588,7 @@
vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+2, "dqsync");
+ DQI_WAIT(dq, PINOD, "dqsync");
if ((dq->dq_flags & DQ_MOD) == 0)
goto out;
dq->dq_flags |= DQ_LOCK;
@@ -1742,7 +1742,7 @@
if ((dq = qrp[i]) == NODQUOT)
continue;
DQI_LOCK(dq);
- DQI_WAIT(dq, PINOD+1, "adjqta");
+ DQI_WAIT(dq, PINOD, "adjqta");
ncurblocks = dq->dq_curblocks + blkcount;
if (ncurblocks >= 0)
dq->dq_curblocks = ncurblocks;
diff --git a/tests/sys/kern/ptrace_test.c b/tests/sys/kern/ptrace_test.c
--- a/tests/sys/kern/ptrace_test.c
+++ b/tests/sys/kern/ptrace_test.c
@@ -34,7 +34,6 @@
#include <sys/ptrace.h>
#include <sys/procfs.h>
#include <sys/queue.h>
-#include <sys/runq.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/user.h>
@@ -2027,7 +2026,7 @@
sched_get_priority_min(SCHED_FIFO)) / 2;
CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
SCHED_FIFO, &sched_param) == 0);
- sched_param.sched_priority -= RQ_PPQ;
+ sched_param.sched_priority -= 1;
CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
&sched_param) == 0);
@@ -2130,7 +2129,7 @@
sched_get_priority_min(SCHED_FIFO)) / 2;
CHILD_REQUIRE(pthread_setschedparam(pthread_self(),
SCHED_FIFO, &sched_param) == 0);
- sched_param.sched_priority -= RQ_PPQ;
+ sched_param.sched_priority -= 1;
CHILD_REQUIRE(pthread_setschedparam(t, SCHED_FIFO,
&sched_param) == 0);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Sep 22, 12:38 PM (22 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
12451471
Default Alt Text
D45393.diff (80 KB)
Attached To
Mode
D45393: runq/sched: Switch to 256 distinct queues and extend the timeshare range
Attached
Detach File
Event Timeline
Log In to Comment