D41483.id.diff
No OneTemporary
Actions

Size

263 KB

Referenced Files

None

Subscribers

None

D41483.id.diff
View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/cddl/lib/libzpool/Makefile b/cddl/lib/libzpool/Makefile
	--- a/cddl/lib/libzpool/Makefile
	+++ b/cddl/lib/libzpool/Makefile
	@@ -135,6 +135,7 @@
	uberblock.c \
	unique.c \
	vdev.c \
	+ vdev_cache.c \
	vdev_draid.c \
	vdev_draid_rand.c \
	vdev_file.c \
	diff --git a/sys/conf/files b/sys/conf/files
	--- a/sys/conf/files
	+++ b/sys/conf/files
	@@ -326,6 +326,7 @@
	contrib/openzfs/module/zfs/uberblock.c optional zfs compile-with "${ZFS_C}"
	contrib/openzfs/module/zfs/unique.c optional zfs compile-with "${ZFS_C}"
	contrib/openzfs/module/zfs/vdev.c optional zfs compile-with "${ZFS_C}"
	+contrib/openzfs/module/zfs/vdev_cache.c optional zfs compile-with "${ZFS_C}"
	contrib/openzfs/module/zfs/vdev_draid.c optional zfs compile-with "${ZFS_C}"
	contrib/openzfs/module/zfs/vdev_draid_rand.c optional zfs compile-with "${ZFS_C}"
	contrib/openzfs/module/zfs/vdev_indirect.c optional zfs compile-with "${ZFS_C}"
	diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
	--- a/sys/conf/kern.pre.mk
	+++ b/sys/conf/kern.pre.mk
	@@ -252,7 +252,8 @@
	# Special flags for managing the compat compiles for ZFS
	ZFS_CFLAGS+= -I$S/contrib/openzfs/module/icp/include \
	${CDDL_CFLAGS} -DBUILDING_ZFS -DHAVE_UIO_ZEROCOPY \
	- -DWITH_NETDUMP -D__KERNEL__ -D_SYS_CONDVAR_H_ -DSMP
	+ -DWITH_NETDUMP -D__KERNEL__ -D_SYS_CONDVAR_H_ -DSMP \
	+ -DIN_FREEBSD_BASE

	.if ${MACHINE_ARCH} == "amd64"
	ZFS_CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
	diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
	--- a/sys/contrib/openzfs/META
	+++ b/sys/contrib/openzfs/META
	@@ -1,10 +1,10 @@
	Meta: 1
	Name: zfs
	Branch: 1.0
	-Version: 2.2.0
	-Release: rc1
	+Version: 2.1.99
	+Release: 1
	Release-Tags: relext
	License: CDDL
	Author: OpenZFS
	-Linux-Maximum: 6.3
	+Linux-Maximum: 6.2
	Linux-Minimum: 3.10
	diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary
	--- a/sys/contrib/openzfs/cmd/arc_summary
	+++ b/sys/contrib/openzfs/cmd/arc_summary
	@@ -64,6 +64,7 @@
	SECTION_PATHS = {'arc': 'arcstats',
	'dmu': 'dmu_tx',
	'l2arc': 'arcstats', # L2ARC stuff lives in arcstats
	+ 'vdev': 'vdev_cache_stats',
	'zfetch': 'zfetchstats',
	'zil': 'zil'}

	@@ -89,6 +90,8 @@
	# Requires py36-sysctl on FreeBSD
	import sysctl

	+ VDEV_CACHE_SIZE = 'vdev.cache_size'
	+
	def is_value(ctl):
	return ctl.type != sysctl.CTLTYPE_NODE

	@@ -132,6 +135,8 @@
	SPL_PATH = '/sys/module/spl/parameters'
	TUNABLES_PATH = '/sys/module/zfs/parameters'

	+ VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
	+
	def load_kstats(section):
	path = os.path.join(KSTAT_PATH, section)
	with open(path) as f:
	@@ -837,8 +842,7 @@
	('Free on write:', 'l2_free_on_write'),
	('R/W clashes:', 'l2_rw_clash'),
	('Bad checksums:', 'l2_cksum_bad'),
	- ('Read errors:', 'l2_io_error'),
	- ('Write errors:', 'l2_writes_error'))
	+ ('I/O errors:', 'l2_io_error'))

	for title, value in l2_todo:
	prt_i1(title, f_hits(arc_stats[value]))
	@@ -874,20 +878,28 @@
	prt_i2('Miss ratio:',
	f_perc(arc_stats['l2_misses'], l2_access_total),
	f_hits(arc_stats['l2_misses']))
	+ prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))

	print()
	- print('L2ARC I/O:')
	- prt_i2('Reads:',
	- f_bytes(arc_stats['l2_read_bytes']),
	- f_hits(arc_stats['l2_hits']))
	- prt_i2('Writes:',
	- f_bytes(arc_stats['l2_write_bytes']),
	- f_hits(arc_stats['l2_writes_sent']))
	+ print('L2ARC writes:')
	+
	+ if arc_stats['l2_writes_done'] != arc_stats['l2_writes_sent']:
	+ prt_i2('Writes sent:', 'FAULTED', f_hits(arc_stats['l2_writes_sent']))
	+ prt_i2('Done ratio:',
	+ f_perc(arc_stats['l2_writes_done'],
	+ arc_stats['l2_writes_sent']),
	+ f_hits(arc_stats['l2_writes_done']))
	+ prt_i2('Error ratio:',
	+ f_perc(arc_stats['l2_writes_error'],
	+ arc_stats['l2_writes_sent']),
	+ f_hits(arc_stats['l2_writes_error']))
	+ else:
	+ prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))

	print()
	print('L2ARC evicts:')
	- prt_i1('L1 cached:', f_hits(arc_stats['l2_evict_l1cached']))
	- prt_i1('While reading:', f_hits(arc_stats['l2_evict_reading']))
	+ prt_i1('Lock retries:', f_hits(arc_stats['l2_evict_lock_retry']))
	+ prt_i1('Upon reading:', f_hits(arc_stats['l2_evict_reading']))
	print()


	@@ -947,6 +959,35 @@
	print()


	+def section_vdev(kstats_dict):
	+ """Collect information on VDEV caches"""
	+
	+ # Currently [Nov 2017] the VDEV cache is disabled, because it is actually
	+ # harmful. When this is the case, we just skip the whole entry. See
	+ # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
	+ # for details
	+ tunables = get_vdev_params()
	+
	+ if tunables[VDEV_CACHE_SIZE] == '0':
	+ print('VDEV cache disabled, skipping section\n')
	+ return
	+
	+ vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
	+
	+ vdev_cache_total = int(vdev_stats['hits']) +\
	+ int(vdev_stats['misses']) +\
	+ int(vdev_stats['delegations'])
	+
	+ prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
	+ prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
	+ f_hits(vdev_stats['hits']))
	+ prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
	+ f_hits(vdev_stats['misses']))
	+ prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
	+ f_hits(vdev_stats['delegations']))
	+ print()
	+
	+
	def section_zil(kstats_dict):
	"""Collect information on the ZFS Intent Log. Some of the information
	taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
	@@ -974,6 +1015,7 @@
	'l2arc': section_l2arc,
	'spl': section_spl,
	'tunables': section_tunables,
	+ 'vdev': section_vdev,
	'zil': section_zil}


	diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
	--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
	+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
	@@ -33,7 +33,6 @@
	* under sponsorship from the FreeBSD Foundation.
	* Copyright (c) 2021 Allan Jude
	* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
	- * Copyright (c) 2023, Klara Inc.
	*/

	#include <stdio.h>
	@@ -327,7 +326,7 @@
	int err;
	struct sublivelist_verify *sv = args;

	- zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
	+ zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
	sizeof (sublivelist_verify_block_refcnt_t));

	err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
	@@ -391,7 +390,7 @@
	{
	(void) args;
	sublivelist_verify_t sv;
	- zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
	+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
	sizeof (sublivelist_verify_block_t));
	int err = sublivelist_verify_func(&sv, dle);
	zfs_btree_clear(&sv.sv_leftover);
	@@ -683,7 +682,7 @@
	(void) printf("Verifying deleted livelist entries\n");

	sublivelist_verify_t sv;
	- zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
	+ zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
	sizeof (sublivelist_verify_block_t));
	iterate_deleted_livelists(spa, livelist_verify, &sv);

	@@ -717,7 +716,7 @@
	mv.mv_start = m->ms_start;
	mv.mv_end = m->ms_start + m->ms_size;
	zfs_btree_create(&mv.mv_livelist_allocs,
	- livelist_block_compare, NULL,
	+ livelist_block_compare,
	sizeof (sublivelist_verify_block_t));

	mv_populate_livelist_allocs(&mv, &sv);
	@@ -790,11 +789,8 @@
	"\t\t[<poolname>[/<dataset \| objset id>] [<object \| range> ...]]\n"
	"\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n"
	"\t\t[<poolname>[/<dataset \| objset id>] [<object \| range> ...]\n"
	- "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n"
	- "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
	- "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
	"\t%s [-v] <bookmark>\n"
	- "\t%s -C [-A] [-U <cache>] [<poolname>]\n"
	+ "\t%s -C [-A] [-U <cache>]\n"
	"\t%s -l [-Aqu] <device>\n"
	"\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
	"[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
	@@ -806,7 +802,7 @@
	"\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
	"<poolname>\n\n",
	cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
	- cmdname, cmdname, cmdname, cmdname, cmdname);
	+ cmdname, cmdname, cmdname, cmdname);

	(void) fprintf(stderr, " Dataset name must include at least one "
	"separator character '/' or '@'\n");
	@@ -829,8 +825,6 @@
	(void) fprintf(stderr, " Options to control amount of output:\n");
	(void) fprintf(stderr, " -b --block-stats "
	"block statistics\n");
	- (void) fprintf(stderr, " -B --backup "
	- "backup stream\n");
	(void) fprintf(stderr, " -c --checksum "
	"checksum all metadata (twice for all data) blocks\n");
	(void) fprintf(stderr, " -C --config "
	@@ -4881,81 +4875,6 @@
	return (err);
	}

	-static int
	-dump_backup_bytes(objset_t os, void buf, int len, void *arg)
	-{
	- const char p = (const char )buf;
	- ssize_t nwritten;
	-
	- (void) os;
	- (void) arg;
	-
	- /* Write the data out, handling short writes and signals. */
	- while ((nwritten = write(STDOUT_FILENO, p, len)) < len) {
	- if (nwritten < 0) {
	- if (errno == EINTR)
	- continue;
	- return (errno);
	- }
	- p += nwritten;
	- len -= nwritten;
	- }
	-
	- return (0);
	-}
	-
	-static void
	-dump_backup(const char pool, uint64_t objset_id, const char flagstr)
	-{
	- boolean_t embed = B_FALSE;
	- boolean_t large_block = B_FALSE;
	- boolean_t compress = B_FALSE;
	- boolean_t raw = B_FALSE;
	-
	- const char *c;
	- for (c = flagstr; c != NULL && *c != '\0'; c++) {
	- switch (*c) {
	- case 'e':
	- embed = B_TRUE;
	- break;
	- case 'L':
	- large_block = B_TRUE;
	- break;
	- case 'c':
	- compress = B_TRUE;
	- break;
	- case 'w':
	- raw = B_TRUE;
	- break;
	- default:
	- fprintf(stderr, "dump_backup: invalid flag "
	- "'%c'\n", *c);
	- return;
	- }
	- }
	-
	- if (isatty(STDOUT_FILENO)) {
	- fprintf(stderr, "dump_backup: stream cannot be written "
	- "to a terminal\n");
	- return;
	- }
	-
	- offset_t off = 0;
	- dmu_send_outparams_t out = {
	- .dso_outfunc = dump_backup_bytes,
	- .dso_dryrun = B_FALSE,
	- };
	-
	- int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed,
	- large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO,
	- &off, &out);
	- if (err != 0) {
	- fprintf(stderr, "dump_backup: dmu_send_obj: %s\n",
	- strerror(err));
	- return;
	- }
	-}
	-
	static int
	zdb_copy_object(objset_t os, uint64_t srcobj, char destfile)
	{
	@@ -8546,9 +8465,9 @@
	*/
	zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
	psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
	- ZIO_FLAG_DONT_PROPAGATE \| ZIO_FLAG_DONT_RETRY \|
	- ZIO_FLAG_CANFAIL \| ZIO_FLAG_RAW \| ZIO_FLAG_OPTIONAL,
	- NULL, NULL));
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_PROPAGATE \|
	+ ZIO_FLAG_DONT_RETRY \| ZIO_FLAG_CANFAIL \| ZIO_FLAG_RAW \|
	+ ZIO_FLAG_OPTIONAL, NULL, NULL));
	}

	error = zio_wait(zio);
	@@ -8642,6 +8561,7 @@
	zio_nowait(zio_vdev_child_io(czio, bp, vd,
	offset, pabd, psize, ZIO_TYPE_READ,
	ZIO_PRIORITY_SYNC_READ,
	+ ZIO_FLAG_DONT_CACHE \|
	ZIO_FLAG_DONT_PROPAGATE \|
	ZIO_FLAG_DONT_RETRY \|
	ZIO_FLAG_CANFAIL \| ZIO_FLAG_RAW \|
	@@ -8775,7 +8695,6 @@
	struct option long_options[] = {
	{"ignore-assertions", no_argument, NULL, 'A'},
	{"block-stats", no_argument, NULL, 'b'},
	- {"backup", no_argument, NULL, 'B'},
	{"checksum", no_argument, NULL, 'c'},
	{"config", no_argument, NULL, 'C'},
	{"datasets", no_argument, NULL, 'd'},
	@@ -8817,11 +8736,10 @@
	};

	while ((c = getopt_long(argc, argv,
	- "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
	+ "AbcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
	long_options, NULL)) != -1) {
	switch (c) {
	case 'b':
	- case 'B':
	case 'c':
	case 'C':
	case 'd':
	@@ -8969,7 +8887,7 @@
	verbose = MAX(verbose, 1);

	for (c = 0; c < 256; c++) {
	- if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL)
	+ if (dump_all && strchr("AeEFkKlLNOPrRSXy", c) == NULL)
	dump_opt[c] = 1;
	if (dump_opt[c])
	dump_opt[c] += verbose;
	@@ -9155,8 +9073,7 @@
	checkpoint_pool, error);
	}

	- } else if (target_is_spa \|\| dump_opt['R'] \|\| dump_opt['B'] \|\|
	- objset_id == 0) {
	+ } else if (target_is_spa \|\| dump_opt['R'] \|\| objset_id == 0) {
	zdb_set_skip_mmp(target);
	error = spa_open_rewind(target, &spa, FTAG, policy,
	NULL);
	@@ -9292,10 +9209,7 @@
	strerror(errno));
	}
	}
	- if (dump_opt['B']) {
	- dump_backup(target, objset_id,
	- argc > 0 ? argv[0] : NULL);
	- } else if (os != NULL) {
	+ if (os != NULL) {
	dump_objset(os);
	} else if (zopt_object_args > 0 && !dump_opt['m']) {
	dump_objset(spa->spa_meta_objset);
	diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
	--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
	+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
	@@ -369,7 +369,9 @@
	return (NULL);
	}

	- if ((event = list_remove_head(&agent_events)) != NULL) {
	+ if ((event = (list_head(&agent_events))) != NULL) {
	+ list_remove(&agent_events, event);
	+
	(void) pthread_mutex_unlock(&agent_lock);

	/* dispatch to all event subscribers */
	@@ -432,7 +434,8 @@
	(void) pthread_join(g_agents_tid, NULL);

	/* drain any pending events */
	- while ((event = list_remove_head(&agent_events)) != NULL) {
	+ while ((event = (list_head(&agent_events))) != NULL) {
	+ list_remove(&agent_events, event);
	nvlist_free(event->ae_nvl);
	free(event);
	}
	diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
	--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
	+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
	@@ -1288,14 +1288,17 @@
	tpool_destroy(g_tpool);
	}

	- while ((pool = list_remove_head(&g_pool_list)) != NULL) {
	+ while ((pool = (list_head(&g_pool_list))) != NULL) {
	+ list_remove(&g_pool_list, pool);
	zpool_close(pool->uap_zhp);
	free(pool);
	}
	list_destroy(&g_pool_list);

	- while ((device = list_remove_head(&g_device_list)) != NULL)
	+ while ((device = (list_head(&g_device_list))) != NULL) {
	+ list_remove(&g_device_list, device);
	free(device);
	+ }
	list_destroy(&g_device_list);

	libzfs_fini(g_zfshdl);
	diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
	--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
	+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
	@@ -6057,8 +6057,8 @@
	if (p != NULL)
	rid = p->pw_uid;
	else if (*endch != '\0') {
	- (void) snprintf(errbuf, sizeof (errbuf),
	- gettext("invalid user %s\n"), curr);
	+ (void) snprintf(errbuf, 256, gettext(
	+ "invalid user %s\n"), curr);
	allow_usage(un, B_TRUE, errbuf);
	}
	} else if (opts->group) {
	@@ -6071,9 +6071,8 @@
	if (g != NULL)
	rid = g->gr_gid;
	else if (*endch != '\0') {
	- (void) snprintf(errbuf, sizeof (errbuf),
	- gettext("invalid group %s\n"),
	- curr);
	+ (void) snprintf(errbuf, 256, gettext(
	+ "invalid group %s\n"), curr);
	allow_usage(un, B_TRUE, errbuf);
	}
	} else {
	@@ -6098,9 +6097,8 @@
	who_type = ZFS_DELEG_GROUP;
	rid = g->gr_gid;
	} else {
	- (void) snprintf(errbuf, sizeof (errbuf),
	- gettext("invalid user/group %s\n"),
	- curr);
	+ (void) snprintf(errbuf, 256, gettext(
	+ "invalid user/group %s\n"), curr);
	allow_usage(un, B_TRUE, errbuf);
	}
	}
	diff --git a/sys/contrib/openzfs/cmd/zilstat.in b/sys/contrib/openzfs/cmd/zilstat.in
	--- a/sys/contrib/openzfs/cmd/zilstat.in
	+++ b/sys/contrib/openzfs/cmd/zilstat.in
	@@ -36,49 +36,31 @@
	from argparse import RawTextHelpFormatter

	cols = {
	- # hdr: [size, scale, kstat name]
	+ # hdr: [size, scale, kstat name]
	"time": [8, -1, "time"],
	"pool": [12, -1, "pool"],
	"ds": [12, -1, "dataset_name"],
	"obj": [12, -1, "objset"],
	- "cc": [5, 1000, "zil_commit_count"],
	- "cwc": [5, 1000, "zil_commit_writer_count"],
	- "ic": [5, 1000, "zil_itx_count"],
	- "iic": [5, 1000, "zil_itx_indirect_count"],
	- "iib": [5, 1024, "zil_itx_indirect_bytes"],
	- "icc": [5, 1000, "zil_itx_copied_count"],
	- "icb": [5, 1024, "zil_itx_copied_bytes"],
	- "inc": [5, 1000, "zil_itx_needcopy_count"],
	- "inb": [5, 1024, "zil_itx_needcopy_bytes"],
	- "idc": [5, 1000, "icc+inc"],
	- "idb": [5, 1024, "icb+inb"],
	- "iwc": [5, 1000, "iic+idc"],
	- "iwb": [5, 1024, "iib+idb"],
	- "imnc": [6, 1000, "zil_itx_metaslab_normal_count"],
	- "imnb": [6, 1024, "zil_itx_metaslab_normal_bytes"],
	- "imnw": [6, 1024, "zil_itx_metaslab_normal_write"],
	- "imna": [6, 1024, "zil_itx_metaslab_normal_alloc"],
	- "imsc": [6, 1000, "zil_itx_metaslab_slog_count"],
	- "imsb": [6, 1024, "zil_itx_metaslab_slog_bytes"],
	- "imsw": [6, 1024, "zil_itx_metaslab_slog_write"],
	- "imsa": [6, 1024, "zil_itx_metaslab_slog_alloc"],
	- "imc": [5, 1000, "imnc+imsc"],
	- "imb": [5, 1024, "imnb+imsb"],
	- "imw": [5, 1024, "imnw+imsw"],
	- "ima": [5, 1024, "imna+imsa"],
	- "se%": [3, 100, "imb/ima"],
	- "sen%": [4, 100, "imnb/imna"],
	- "ses%": [4, 100, "imsb/imsa"],
	- "te%": [3, 100, "imb/imw"],
	- "ten%": [4, 100, "imnb/imnw"],
	- "tes%": [4, 100, "imsb/imsw"],
	+ "zcc": [10, 1000, "zil_commit_count"],
	+ "zcwc": [10, 1000, "zil_commit_writer_count"],
	+ "ziic": [10, 1000, "zil_itx_indirect_count"],
	+ "zic": [10, 1000, "zil_itx_count"],
	+ "ziib": [10, 1024, "zil_itx_indirect_bytes"],
	+ "zicc": [10, 1000, "zil_itx_copied_count"],
	+ "zicb": [10, 1024, "zil_itx_copied_bytes"],
	+ "zinc": [10, 1000, "zil_itx_needcopy_count"],
	+ "zinb": [10, 1024, "zil_itx_needcopy_bytes"],
	+ "zimnc": [10, 1000, "zil_itx_metaslab_normal_count"],
	+ "zimnb": [10, 1024, "zil_itx_metaslab_normal_bytes"],
	+ "zimsc": [10, 1000, "zil_itx_metaslab_slog_count"],
	+ "zimsb": [10, 1024, "zil_itx_metaslab_slog_bytes"],
	}

	-hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
	- "imnc", "imnw", "imsc", "imsw"]
	+hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
	+ "zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]

	-ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
	- "imnc", "imnw", "imsc", "imsw"]
	+ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
	+ "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]

	cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")

	@@ -123,7 +105,7 @@
	global sep
	for col in hdr:
	new_col = col
	- if interval > 0 and cols[col][1] > 100:
	+ if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
	new_col += "/s"
	sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
	sys.stdout.write("\n")
	@@ -133,7 +115,7 @@
	global sep
	for col in hdr:
	val = v[cols[col][2]]
	- if interval > 0 and cols[col][1] > 100:
	+ if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
	val = v[cols[col][2]] // interval
	sys.stdout.write("%s%s" % (
	prettynum(cols[col][0], cols[col][1], val), sep))
	@@ -255,7 +237,9 @@

	invalid = []
	for ele in hdr:
	- if ele not in cols:
	+ if gFlag and ele not in ghdr:
	+ invalid.append(ele)
	+ elif ele not in cols:
	invalid.append(ele)

	if len(invalid) > 0:
	@@ -419,17 +403,17 @@
	diff = copy.deepcopy(curr)
	for pool in curr:
	for objset in curr[pool]:
	- for key in curr[pool][objset]:
	- if not isinstance(diff[pool][objset][key], int):
	- continue
	- # If prev is NULL, this is the
	- # first time we are here
	- if not prev:
	- diff[pool][objset][key] = 0
	- else:
	- diff[pool][objset][key] \
	- = curr[pool][objset][key] \
	- - prev[pool][objset][key]
	+ for col in hdr:
	+ if col not in ['time', 'pool', 'ds', 'obj']:
	+ key = cols[col][2]
	+ # If prev is NULL, this is the
	+ # first time we are here
	+ if not prev:
	+ diff[pool][objset][key] = 0
	+ else:
	+ diff[pool][objset][key] \
	+ = curr[pool][objset][key] \
	+ - prev[pool][objset][key]

	def zil_build_dict(pool = "GLOBAL"):
	global kstat
	@@ -441,77 +425,10 @@
	if objset not in curr[pool]:
	curr[pool][objset] = dict()
	curr[pool][objset][key] = val
	-
	-def zil_extend_dict():
	- global diff
	- for pool in diff:
	- for objset in diff[pool]:
	- diff[pool][objset]["pool"] = pool
	- diff[pool][objset]["objset"] = objset
	- diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
	- time.localtime())
	- diff[pool][objset]["icc+inc"] = \
	- diff[pool][objset]["zil_itx_copied_count"] + \
	- diff[pool][objset]["zil_itx_needcopy_count"]
	- diff[pool][objset]["icb+inb"] = \
	- diff[pool][objset]["zil_itx_copied_bytes"] + \
	- diff[pool][objset]["zil_itx_needcopy_bytes"]
	- diff[pool][objset]["iic+idc"] = \
	- diff[pool][objset]["zil_itx_indirect_count"] + \
	- diff[pool][objset]["zil_itx_copied_count"] + \
	- diff[pool][objset]["zil_itx_needcopy_count"]
	- diff[pool][objset]["iib+idb"] = \
	- diff[pool][objset]["zil_itx_indirect_bytes"] + \
	- diff[pool][objset]["zil_itx_copied_bytes"] + \
	- diff[pool][objset]["zil_itx_needcopy_bytes"]
	- diff[pool][objset]["imnc+imsc"] = \
	- diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
	- diff[pool][objset]["zil_itx_metaslab_slog_count"]
	- diff[pool][objset]["imnb+imsb"] = \
	- diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
	- diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
	- diff[pool][objset]["imnw+imsw"] = \
	- diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
	- diff[pool][objset]["zil_itx_metaslab_slog_write"]
	- diff[pool][objset]["imna+imsa"] = \
	- diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
	- diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
	- if diff[pool][objset]["imna+imsa"] > 0:
	- diff[pool][objset]["imb/ima"] = 100 * \
	- diff[pool][objset]["imnb+imsb"] // \
	- diff[pool][objset]["imna+imsa"]
	- else:
	- diff[pool][objset]["imb/ima"] = 100
	- if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
	- diff[pool][objset]["imnb/imna"] = 100 * \
	- diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
	- diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
	- else:
	- diff[pool][objset]["imnb/imna"] = 100
	- if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
	- diff[pool][objset]["imsb/imsa"] = 100 * \
	- diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
	- diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
	- else:
	- diff[pool][objset]["imsb/imsa"] = 100
	- if diff[pool][objset]["imnw+imsw"] > 0:
	- diff[pool][objset]["imb/imw"] = 100 * \
	- diff[pool][objset]["imnb+imsb"] // \
	- diff[pool][objset]["imnw+imsw"]
	- else:
	- diff[pool][objset]["imb/imw"] = 100
	- if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
	- diff[pool][objset]["imnb/imnw"] = 100 * \
	- diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
	- diff[pool][objset]["zil_itx_metaslab_normal_write"]
	- else:
	- diff[pool][objset]["imnb/imnw"] = 100
	- if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
	- diff[pool][objset]["imsb/imsw"] = 100 * \
	- diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
	- diff[pool][objset]["zil_itx_metaslab_slog_write"]
	- else:
	- diff[pool][objset]["imsb/imsw"] = 100
	+ curr[pool][objset]["pool"] = pool
	+ curr[pool][objset]["objset"] = objset
	+ curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
	+ time.localtime())

	def sign_handler_epipe(sig, frame):
	print("Caught EPIPE signal: " + str(frame))
	@@ -520,31 +437,30 @@

	def main():
	global interval
	- global curr, diff
	+ global curr
	hprint = False
	init()
	signal.signal(signal.SIGINT, signal.SIG_DFL)
	signal.signal(signal.SIGPIPE, sign_handler_epipe)

	- zil_process_kstat()
	- if not curr:
	- print ("Error: No stats to show")
	- sys.exit(0)
	- print_header()
	if interval > 0:
	- time.sleep(interval)
	while True:
	calculate_diff()
	if not diff:
	print ("Error: No stats to show")
	sys.exit(0)
	- zil_extend_dict()
	+ if hprint == False:
	+ print_header()
	+ hprint = True
	print_dict(diff)
	time.sleep(interval)
	else:
	- diff = curr
	- zil_extend_dict()
	- print_dict(diff)
	+ zil_process_kstat()
	+ if not curr:
	+ print ("Error: No stats to show")
	+ sys.exit(0)
	+ print_header()
	+ print_dict(curr)

	if __name__ == '__main__':
	main()
	diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am
	--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
	+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
	@@ -145,7 +145,6 @@
	%D%/compatibility.d/openzfs-2.0-linux \
	%D%/compatibility.d/openzfs-2.1-freebsd \
	%D%/compatibility.d/openzfs-2.1-linux \
	- %D%/compatibility.d/openzfs-2.2 \
	%D%/compatibility.d/openzfsonosx-1.7.0 \
	%D%/compatibility.d/openzfsonosx-1.8.1 \
	%D%/compatibility.d/openzfsonosx-1.9.3 \
	@@ -169,20 +168,12 @@
	"freebsd-11.3 freebsd-12.0" \
	"freebsd-11.3 freebsd-12.1" \
	"freebsd-11.3 freebsd-12.2" \
	- "freebsd-11.3 freebsd-12.3" \
	- "freebsd-11.3 freebsd-12.4" \
	- "openzfs-2.1-freebsd freebsd-13.0" \
	- "openzfs-2.1-freebsd freebsd-13.1" \
	- "openzfs-2.1-freebsd freebsd-13.2" \
	"freebsd-11.3 freenas-11.3" \
	"freenas-11.0 freenas-11.1" \
	"openzfsonosx-1.9.3 openzfsonosx-1.9.4" \
	"openzfs-2.0-freebsd truenas-12.0" \
	"zol-0.7 ubuntu-18.04" \
	- "zol-0.8 ubuntu-20.04" \
	- "openzfs-2.1-linux ubuntu-22.04" \
	- "openzfs-2.2 openzfs-2.2-linux" \
	- "openzfs-2.2 openzfs-2.2-freebsd"
	+ "zol-0.8 ubuntu-20.04"

	zpoolconfdir = $(sysconfdir)/zfs/zpool.d
	INSTALL_DATA_HOOKS += zpool-install-data-hook
	diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
	--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
	+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
	@@ -8,7 +8,5 @@
	filesystem_limits
	hole_birth
	large_blocks
	-livelist
	lz4_compress
	spacemap_histogram
	-zpool_checkpoint
	diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
	deleted file mode 100644
	--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
	+++ /dev/null
	@@ -1,40 +0,0 @@
	-# Features supported by OpenZFS 2.2 on Linux and FreeBSD
	-allocation_classes
	-async_destroy
	-blake3
	-block_cloning
	-bookmark_v2
	-bookmark_written
	-bookmarks
	-device_rebuild
	-device_removal
	-draid
	-edonr
	-embedded_data
	-empty_bpobj
	-enabled_txg
	-encryption
	-extensible_dataset
	-filesystem_limits
	-head_errlog
	-hole_birth
	-large_blocks
	-large_dnode
	-livelist
	-log_spacemap
	-lz4_compress
	-multi_vdev_crash_dump
	-obsolete_counts
	-project_quota
	-redacted_datasets
	-redaction_bookmarks
	-resilver_defer
	-sha512
	-skein
	-spacemap_histogram
	-spacemap_v2
	-userobj_accounting
	-vdev_zaps_v2
	-zilsaxattr
	-zpool_checkpoint
	-zstd_compress
	diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
	--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
	+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
	@@ -7662,11 +7662,11 @@
	print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
	{
	time_t start, end, pause;
	- uint64_t pass_scanned, scanned, pass_issued, issued, total_s, total_i;
	+ uint64_t pass_scanned, scanned, pass_issued, issued, total;
	uint64_t elapsed, scan_rate, issue_rate;
	double fraction_done;
	- char processed_buf[7], scanned_buf[7], issued_buf[7], total_s_buf[7];
	- char total_i_buf[7], srate_buf[7], irate_buf[7], time_buf[32];
	+ char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
	+ char srate_buf[7], irate_buf[7], time_buf[32];

	printf(" ");
	printf_color(ANSI_BOLD, gettext("scan:"));
	@@ -7738,11 +7738,10 @@
	pass_scanned = ps->pss_pass_exam;
	issued = ps->pss_issued;
	pass_issued = ps->pss_pass_issued;
	- total_s = ps->pss_to_examine;
	- total_i = ps->pss_to_examine - ps->pss_skipped;
	+ total = ps->pss_to_examine;

	/* we are only done with a block once we have issued the IO for it */
	- fraction_done = (double)issued / total_i;
	+ fraction_done = (double)issued / total;

	/* elapsed time for this pass, rounding up to 1 if it's 0 */
	elapsed = time(NULL) - ps->pss_pass_start;
	@@ -7751,25 +7750,26 @@

	scan_rate = pass_scanned / elapsed;
	issue_rate = pass_issued / elapsed;
	+ uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
	+ ((total - issued) / issue_rate) : UINT64_MAX;
	+ secs_to_dhms(total_secs_left, time_buf);

	/* format all of the numbers we will be reporting */
	zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
	zfs_nicebytes(issued, issued_buf, sizeof (issued_buf));
	- zfs_nicebytes(total_s, total_s_buf, sizeof (total_s_buf));
	- zfs_nicebytes(total_i, total_i_buf, sizeof (total_i_buf));
	+ zfs_nicebytes(total, total_buf, sizeof (total_buf));
	+ zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
	+ zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));

	/* do not print estimated time if we have a paused scrub */
	- (void) printf(gettext("\t%s / %s scanned"), scanned_buf, total_s_buf);
	- if (pause == 0 && scan_rate > 0) {
	- zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
	- (void) printf(gettext(" at %s/s"), srate_buf);
	- }
	- (void) printf(gettext(", %s / %s issued"), issued_buf, total_i_buf);
	- if (pause == 0 && issue_rate > 0) {
	- zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
	- (void) printf(gettext(" at %s/s"), irate_buf);
	+ if (pause == 0) {
	+ (void) printf(gettext("\t%s scanned at %s/s, "
	+ "%s issued at %s/s, %s total\n"),
	+ scanned_buf, srate_buf, issued_buf, irate_buf, total_buf);
	+ } else {
	+ (void) printf(gettext("\t%s scanned, %s issued, %s total\n"),
	+ scanned_buf, issued_buf, total_buf);
	}
	- (void) printf(gettext("\n"));

	if (is_resilver) {
	(void) printf(gettext("\t%s resilvered, %.2f%% done"),
	@@ -7782,16 +7782,16 @@
	if (pause == 0) {
	/*
	* Only provide an estimate iff:
	- * 1) we haven't yet issued all we expected, and
	+ * 1) the time remaining is valid, and
	* 2) the issue rate exceeds 10 MB/s, and
	* 3) it's either:
	* a) a resilver which has started repairs, or
	* b) a scrub which has entered the issue phase.
	*/
	- if (total_i >= issued && issue_rate >= 10 * 1024 * 1024 &&
	+ if (total_secs_left != UINT64_MAX &&
	+ issue_rate >= 10 * 1024 * 1024 &&
	((is_resilver && ps->pss_processed > 0) \|\|
	(is_scrub && issued > 0))) {
	- secs_to_dhms((total_i - issued) / issue_rate, time_buf);
	(void) printf(gettext(", %s to go\n"), time_buf);
	} else {
	(void) printf(gettext(", no estimated "
	@@ -7803,7 +7803,7 @@
	}

	static void
	-print_rebuild_status_impl(vdev_rebuild_stat_t vrs, uint_t c, char vdev_name)
	+print_rebuild_status_impl(vdev_rebuild_stat_t vrs, char vdev_name)
	{
	if (vrs == NULL \|\| vrs->vrs_state == VDEV_REBUILD_NONE)
	return;
	@@ -7815,20 +7815,17 @@
	uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
	uint64_t bytes_issued = vrs->vrs_bytes_issued;
	uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
	- uint64_t bytes_est_s = vrs->vrs_bytes_est;
	- uint64_t bytes_est_i = vrs->vrs_bytes_est;
	- if (c > offsetof(vdev_rebuild_stat_t, vrs_pass_bytes_skipped) / 8)
	- bytes_est_i -= vrs->vrs_pass_bytes_skipped;
	+ uint64_t bytes_est = vrs->vrs_bytes_est;
	uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
	(vrs->vrs_pass_time_ms + 1)) * 1000;
	uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
	(vrs->vrs_pass_time_ms + 1)) * 1000;
	double scan_pct = MIN((double)bytes_scanned * 100 /
	- (bytes_est_s + 1), 100);
	+ (bytes_est + 1), 100);

	/* Format all of the numbers we will be reporting */
	char bytes_scanned_buf[7], bytes_issued_buf[7];
	- char bytes_rebuilt_buf[7], bytes_est_s_buf[7], bytes_est_i_buf[7];
	+ char bytes_rebuilt_buf[7], bytes_est_buf[7];
	char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
	zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
	sizeof (bytes_scanned_buf));
	@@ -7836,8 +7833,9 @@
	sizeof (bytes_issued_buf));
	zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
	sizeof (bytes_rebuilt_buf));
	- zfs_nicebytes(bytes_est_s, bytes_est_s_buf, sizeof (bytes_est_s_buf));
	- zfs_nicebytes(bytes_est_i, bytes_est_i_buf, sizeof (bytes_est_i_buf));
	+ zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
	+ zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
	+ zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));

	time_t start = vrs->vrs_start_time;
	time_t end = vrs->vrs_end_time;
	@@ -7860,29 +7858,17 @@

	assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);

	- (void) printf(gettext("\t%s / %s scanned"), bytes_scanned_buf,
	- bytes_est_s_buf);
	- if (scan_rate > 0) {
	- zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
	- (void) printf(gettext(" at %s/s"), scan_rate_buf);
	- }
	- (void) printf(gettext(", %s / %s issued"), bytes_issued_buf,
	- bytes_est_i_buf);
	- if (issue_rate > 0) {
	- zfs_nicebytes(issue_rate, issue_rate_buf,
	- sizeof (issue_rate_buf));
	- (void) printf(gettext(" at %s/s"), issue_rate_buf);
	- }
	- (void) printf(gettext("\n"));
	+ secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
	+ MAX(scan_rate, 1), time_buf);

	+ (void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
	+ "%s total\n"), bytes_scanned_buf, scan_rate_buf,
	+ bytes_issued_buf, issue_rate_buf, bytes_est_buf);
	(void) printf(gettext("\t%s resilvered, %.2f%% done"),
	bytes_rebuilt_buf, scan_pct);

	if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
	- if (bytes_est_s >= bytes_scanned &&
	- scan_rate >= 10 * 1024 * 1024) {
	- secs_to_dhms((bytes_est_s - bytes_scanned) / scan_rate,
	- time_buf);
	+ if (scan_rate >= 10 * 1024 * 1024) {
	(void) printf(gettext(", %s to go\n"), time_buf);
	} else {
	(void) printf(gettext(", no estimated "
	@@ -7914,7 +7900,7 @@
	ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
	char *name = zpool_vdev_name(g_zfs, zhp,
	child[c], VDEV_NAME_TYPE_ID);
	- print_rebuild_status_impl(vrs, i, name);
	+ print_rebuild_status_impl(vrs, name);
	free(name);
	}
	}
	@@ -8019,15 +8005,13 @@
	active_resilver = (ps->pss_state == DSS_SCANNING);
	}

	+
	have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
	have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
	scrub_start = ps->pss_start_time;
	- if (c > offsetof(pool_scan_stat_t,
	- pss_pass_error_scrub_pause) / 8) {
	- have_errorscrub = (ps->pss_error_scrub_func ==
	- POOL_SCAN_ERRORSCRUB);
	- errorscrub_start = ps->pss_error_scrub_start;
	- }
	+ have_errorscrub = (ps->pss_error_scrub_func ==
	+ POOL_SCAN_ERRORSCRUB);
	+ errorscrub_start = ps->pss_error_scrub_start;
	}

	boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
	diff --git a/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c b/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
	--- a/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
	+++ b/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
	@@ -238,7 +238,6 @@
	print_kv("end_ts", ps->pss_end_time);
	print_kv(",errors", ps->pss_errors);
	print_kv(",examined", examined);
	- print_kv(",skipped", ps->pss_skipped);
	print_kv(",issued", ps->pss_issued);
	print_kv(",pass_examined", pass_exam);
	print_kv(",pass_issued", ps->pss_pass_issued);
	@@ -250,6 +249,7 @@
	print_kv(",remaining_t", remaining_time);
	print_kv(",start_ts", ps->pss_start_time);
	print_kv(",to_examine", ps->pss_to_examine);
	+ print_kv(",to_process", ps->pss_to_process);
	printf(" %llu\n", (u_longlong_t)timestamp);
	return (0);
	}
	diff --git a/sys/contrib/openzfs/config/kernel-reclaim_state.m4 b/sys/contrib/openzfs/config/kernel-reclaim_state.m4
	deleted file mode 100644
	--- a/sys/contrib/openzfs/config/kernel-reclaim_state.m4
	+++ /dev/null
	@@ -1,26 +0,0 @@
	-AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
	- dnl #
	- dnl # 6.4 API change
	- dnl # The reclaimed_slab of struct reclaim_state
	- dnl # is renamed to reclaimed
	- dnl #
	- ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
	- #include <linux/swap.h>
	- static const struct reclaim_state
	- rs __attribute__ ((unused)) = {
	- .reclaimed = 100,
	- };
	- ],[])
	-])
	-
	-AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
	- AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
	- ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
	- AC_MSG_RESULT(yes)
	- AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
	- [struct reclaim_state has reclaimed])
	- ],[
	- AC_MSG_RESULT(no)
	- ])
	-])
	-
	diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
	--- a/sys/contrib/openzfs/config/kernel.m4
	+++ b/sys/contrib/openzfs/config/kernel.m4
	@@ -153,7 +153,6 @@
	ZFS_AC_KERNEL_SRC_IATTR_VFSID
	ZFS_AC_KERNEL_SRC_FILEMAP
	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
	- ZFS_AC_KERNEL_SRC_RECLAIMED
	case "$host_cpu" in
	powerpc*)
	ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
	@@ -286,7 +285,6 @@
	ZFS_AC_KERNEL_IATTR_VFSID
	ZFS_AC_KERNEL_FILEMAP
	ZFS_AC_KERNEL_WRITEPAGE_T
	- ZFS_AC_KERNEL_RECLAIMED
	case "$host_cpu" in
	powerpc*)
	ZFS_AC_KERNEL_CPU_HAS_FEATURE
	diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init
	new file mode 120000
	--- /dev/null
	+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init
	@@ -0,0 +1 @@
	+../etc/init.d/zfs-zed
	\ No newline at end of file
	diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init
	new file mode 120000
	--- /dev/null
	+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init
	@@ -0,0 +1 @@
	+../etc/init.d/zfs-import
	\ No newline at end of file
	diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init
	new file mode 120000
	--- /dev/null
	+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init
	@@ -0,0 +1 @@
	+../etc/init.d/zfs-load-key
	\ No newline at end of file
	diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init
	new file mode 120000
	--- /dev/null
	+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init
	@@ -0,0 +1 @@
	+../etc/init.d/zfs-mount
	\ No newline at end of file
	diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init
	new file mode 120000
	--- /dev/null
	+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init
	@@ -0,0 +1 @@
	+../etc/init.d/zfs-share
	\ No newline at end of file
	diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in
	--- a/sys/contrib/openzfs/contrib/debian/rules.in
	+++ b/sys/contrib/openzfs/contrib/debian/rules.in
	@@ -7,8 +7,8 @@
	LINUX_MIN := $(shell awk '/Linux-Minimum:/{print $$2}' META)
	LINUX_NEXT := $(shell awk -F'[ .]' '/Linux-Maximum:/{print $$2 "." $$3+1}' META)

	-DKMSFILES := module include config zfs.release.in autogen.sh copy-builtin META AUTHORS \
	- COPYRIGHT LICENSE README.md CODE_OF_CONDUCT.md NEWS NOTICE RELEASES.md
	+DKMSFILES := module include config zfs.release.in autogen.sh META AUTHORS \
	+ COPYRIGHT LICENSE README.md

	ifndef KVERS
	KVERS=$(shell uname -r)
	diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
	--- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
	+++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
	@@ -36,7 +36,7 @@
	{ dfatal "Failed to install essential binaries"; exit 1; }

	# Adapted from https://github.com/zbm-dev/zfsbootmenu
	- if ! ldd "$(command -v zpool)" \| grep -qF 'libgcc_s.so' && ldconfig -p 2> /dev/null \| grep -qF 'libc.so.6' ; then
	+ if ! ldd "$(command -v zpool)" \| grep -qF 'libgcc_s.so'; then
	# On systems with gcc-config (Gentoo, Funtoo, etc.), use it to find libgcc_s
	if command -v gcc-config >/dev/null; then
	inst_simple "/usr/lib/gcc/$(s=$(gcc-config -c); echo "${s%-}/${s##-}")/libgcc_s.so.1" \|\|
	diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
	--- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
	+++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
	@@ -344,7 +344,7 @@

	# Need the _original_ datasets mountpoint!
	mountpoint=$(get_fs_value "$fs" mountpoint)
	- ZFS_CMD="mount -o zfsutil -t zfs"
	+ ZFS_CMD="mount.zfs -o zfsutil"
	if [ "$mountpoint" = "legacy" ] \|\| [ "$mountpoint" = "none" ]; then
	# Can't use the mountpoint property. Might be one of our
	# clones. Check the 'org.zol:mountpoint' property set in
	@@ -361,7 +361,7 @@
	fi
	# Don't use mount.zfs -o zfsutils for legacy mountpoint
	if [ "$mountpoint" = "legacy" ]; then
	- ZFS_CMD="mount -t zfs"
	+ ZFS_CMD="mount.zfs"
	fi
	# Last hail-mary: Hope 'rootmnt' is set!
	mountpoint=""
	@@ -944,7 +944,7 @@
	echo " not specified on the kernel command line."
	echo ""
	echo "Manually mount the root filesystem on $rootmnt and then exit."
	- echo "Hint: Try: mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
	+ echo "Hint: Try: mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
	shell
	fi

	diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
	--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
	+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
	@@ -67,7 +67,6 @@
	#include <sys/mman.h>

	static const char PASSWORD_VAR_NAME[] = "pam_zfs_key_authtok";
	-static const char OLD_PASSWORD_VAR_NAME[] = "pam_zfs_key_oldauthtok";

	static libzfs_handle_t *g_zfs;

	@@ -161,10 +160,10 @@
	}

	static pw_password_t *
	-pw_fetch(pam_handle_t *pamh, int tok)
	+pw_fetch(pam_handle_t *pamh)
	{
	const char *token;
	- if (pam_get_authtok(pamh, tok, &token, NULL) != PAM_SUCCESS) {
	+ if (pam_get_authtok(pamh, PAM_AUTHTOK, &token, NULL) != PAM_SUCCESS) {
	pam_syslog(pamh, LOG_ERR,
	"couldn't get password from PAM stack");
	return (NULL);
	@@ -178,13 +177,13 @@
	}

	static const pw_password_t *
	-pw_fetch_lazy(pam_handle_t pamh, int tok, const char var_name)
	+pw_fetch_lazy(pam_handle_t *pamh)
	{
	- pw_password_t *pw = pw_fetch(pamh, tok);
	+ pw_password_t *pw = pw_fetch(pamh);
	if (pw == NULL) {
	return (NULL);
	}
	- int ret = pam_set_data(pamh, var_name, pw, destroy_pw);
	+ int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, pw, destroy_pw);
	if (ret != PAM_SUCCESS) {
	pw_free(pw);
	pam_syslog(pamh, LOG_ERR, "pam_set_data failed");
	@@ -194,23 +193,23 @@
	}

	static const pw_password_t *
	-pw_get(pam_handle_t pamh, int tok, const char var_name)
	+pw_get(pam_handle_t *pamh)
	{
	const pw_password_t *authtok = NULL;
	- int ret = pam_get_data(pamh, var_name,
	+ int ret = pam_get_data(pamh, PASSWORD_VAR_NAME,
	(const void**)(&authtok));
	if (ret == PAM_SUCCESS)
	return (authtok);
	if (ret == PAM_NO_MODULE_DATA)
	- return (pw_fetch_lazy(pamh, tok, var_name));
	+ return (pw_fetch_lazy(pamh));
	pam_syslog(pamh, LOG_ERR, "password not available");
	return (NULL);
	}

	static int
	-pw_clear(pam_handle_t pamh, const char var_name)
	+pw_clear(pam_handle_t *pamh)
	{
	- int ret = pam_set_data(pamh, var_name, NULL, NULL);
	+ int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, NULL, NULL);
	if (ret != PAM_SUCCESS) {
	pam_syslog(pamh, LOG_ERR, "clearing password failed");
	return (-1);
	@@ -387,7 +386,7 @@
	int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value,
	WRAPPING_KEY_LEN);
	pw_free(key);
	- if (ret && ret != EEXIST) {
	+ if (ret) {
	pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret);
	zfs_close(ds);
	return (-1);
	@@ -407,14 +406,14 @@
	}

	static int
	-unmount_unload(pam_handle_t pamh, const char ds_name, boolean_t force)
	+unmount_unload(pam_handle_t pamh, const char ds_name)
	{
	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
	if (ds == NULL) {
	pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
	return (-1);
	}
	- int ret = zfs_unmount(ds, NULL, force ? MS_FORCE : 0);
	+ int ret = zfs_unmount(ds, NULL, 0);
	if (ret) {
	pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret);
	zfs_close(ds);
	@@ -436,13 +435,9 @@
	char *runstatedir;
	char *homedir;
	char *dsname;
	- uid_t uid_min;
	- uid_t uid_max;
	uid_t uid;
	const char *username;
	- boolean_t unmount_and_unload;
	- boolean_t force_unmount;
	- boolean_t recursive_homes;
	+ int unmount_and_unload;
	} zfs_key_config_t;

	static int
	@@ -474,13 +469,9 @@
	free(config->homes_prefix);
	return (PAM_USER_UNKNOWN);
	}
	- config->uid_min = 1000;
	- config->uid_max = MAXUID;
	config->uid = entry->pw_uid;
	config->username = name;
	- config->unmount_and_unload = B_TRUE;
	- config->force_unmount = B_FALSE;
	- config->recursive_homes = B_FALSE;
	+ config->unmount_and_unload = 1;
	config->dsname = NULL;
	config->homedir = NULL;
	for (int c = 0; c < argc; c++) {
	@@ -490,16 +481,8 @@
	} else if (strncmp(argv[c], "runstatedir=", 12) == 0) {
	free(config->runstatedir);
	config->runstatedir = strdup(argv[c] + 12);
	- } else if (strncmp(argv[c], "uid_min=", 8) == 0) {
	- sscanf(argv[c] + 8, "%u", &config->uid_min);
	- } else if (strncmp(argv[c], "uid_max=", 8) == 0) {
	- sscanf(argv[c] + 8, "%u", &config->uid_max);
	} else if (strcmp(argv[c], "nounmount") == 0) {
	- config->unmount_and_unload = B_FALSE;
	- } else if (strcmp(argv[c], "forceunmount") == 0) {
	- config->force_unmount = B_TRUE;
	- } else if (strcmp(argv[c], "recursive_homes") == 0) {
	- config->recursive_homes = B_TRUE;
	+ config->unmount_and_unload = 0;
	} else if (strcmp(argv[c], "prop_mountpoint") == 0) {
	if (config->homedir == NULL)
	config->homedir = strdup(entry->pw_dir);
	@@ -534,12 +517,8 @@
	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
	sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
	if (strcmp(target->homedir, mountpoint) != 0) {
	- if (target->recursive_homes) {
	- (void) zfs_iter_filesystems_v2(zhp, 0,
	- find_dsname_by_prop_value, target);
	- }
	zfs_close(zhp);
	- return (target->dsname != NULL);
	+ return (0);
	}

	target->dsname = strdup(zfs_get_name(zhp));
	@@ -552,23 +531,17 @@
	{
	if (config->homedir != NULL &&
	config->homes_prefix != NULL) {
	- if (strcmp(config->homes_prefix, "*") == 0) {
	- (void) zfs_iter_root(g_zfs,
	- find_dsname_by_prop_value, config);
	- } else {
	- zfs_handle_t *zhp = zfs_open(g_zfs,
	- config->homes_prefix, ZFS_TYPE_FILESYSTEM);
	- if (zhp == NULL) {
	- pam_syslog(NULL, LOG_ERR,
	- "dataset %s not found",
	- config->homes_prefix);
	- return (NULL);
	- }
	-
	- (void) zfs_iter_filesystems_v2(zhp, 0,
	- find_dsname_by_prop_value, config);
	- zfs_close(zhp);
	+ zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix,
	+ ZFS_TYPE_FILESYSTEM);
	+ if (zhp == NULL) {
	+ pam_syslog(NULL, LOG_ERR, "dataset %s not found",
	+ config->homes_prefix);
	+ return (NULL);
	}
	+
	+ (void) zfs_iter_filesystems_v2(zhp, 0,
	+ find_dsname_by_prop_value, config);
	+ zfs_close(zhp);
	char *dsname = config->dsname;
	config->dsname = NULL;
	return (dsname);
	@@ -682,13 +655,8 @@
	if (config_err != PAM_SUCCESS) {
	return (config_err);
	}
	- if (config.uid < config.uid_min \|\| config.uid > config.uid_max) {
	- zfs_key_config_free(&config);
	- return (PAM_SERVICE_ERR);
	- }

	- const pw_password_t *token = pw_fetch_lazy(pamh,
	- PAM_AUTHTOK, PASSWORD_VAR_NAME);
	+ const pw_password_t *token = pw_fetch_lazy(pamh);
	if (token == NULL) {
	zfs_key_config_free(&config);
	return (PAM_AUTH_ERR);
	@@ -738,12 +706,10 @@
	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
	return (PAM_SERVICE_ERR);
	}
	- if (config.uid < config.uid_min \|\| config.uid > config.uid_max) {
	+ if (config.uid < 1000) {
	zfs_key_config_free(&config);
	- return (PAM_SERVICE_ERR);
	+ return (PAM_SUCCESS);
	}
	- const pw_password_t *old_token = pw_get(pamh,
	- PAM_OLDAUTHTOK, OLD_PASSWORD_VAR_NAME);
	{
	if (pam_zfs_init(pamh) != 0) {
	zfs_key_config_free(&config);
	@@ -755,62 +721,49 @@
	zfs_key_config_free(&config);
	return (PAM_SERVICE_ERR);
	}
	- if (!old_token) {
	- pam_syslog(pamh, LOG_ERR,
	- "old password from PAM stack is null");
	+ int key_loaded = is_key_loaded(pamh, dataset);
	+ if (key_loaded == -1) {
	free(dataset);
	pam_zfs_free();
	zfs_key_config_free(&config);
	return (PAM_SERVICE_ERR);
	}
	- if (decrypt_mount(pamh, dataset,
	- old_token->value, B_TRUE) == -1) {
	+ free(dataset);
	+ pam_zfs_free();
	+ if (! key_loaded) {
	pam_syslog(pamh, LOG_ERR,
	- "old token mismatch");
	- free(dataset);
	- pam_zfs_free();
	+ "key not loaded, returning try_again");
	zfs_key_config_free(&config);
	return (PAM_PERM_DENIED);
	}
	}

	if ((flags & PAM_UPDATE_AUTHTOK) != 0) {
	- const pw_password_t *token = pw_get(pamh, PAM_AUTHTOK,
	- PASSWORD_VAR_NAME);
	+ const pw_password_t *token = pw_get(pamh);
	if (token == NULL) {
	- pam_syslog(pamh, LOG_ERR, "new password unavailable");
	- pam_zfs_free();
	zfs_key_config_free(&config);
	- pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
	+ return (PAM_SERVICE_ERR);
	+ }
	+ if (pam_zfs_init(pamh) != 0) {
	+ zfs_key_config_free(&config);
	return (PAM_SERVICE_ERR);
	}
	char *dataset = zfs_key_config_get_dataset(&config);
	if (!dataset) {
	pam_zfs_free();
	zfs_key_config_free(&config);
	- pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
	- pw_clear(pamh, PASSWORD_VAR_NAME);
	return (PAM_SERVICE_ERR);
	}
	- int was_loaded = is_key_loaded(pamh, dataset);
	- if (!was_loaded && decrypt_mount(pamh, dataset,
	- old_token->value, B_FALSE) == -1) {
	+ if (change_key(pamh, dataset, token->value) == -1) {
	free(dataset);
	pam_zfs_free();
	zfs_key_config_free(&config);
	- pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
	- pw_clear(pamh, PASSWORD_VAR_NAME);
	return (PAM_SERVICE_ERR);
	}
	- int changed = change_key(pamh, dataset, token->value);
	- if (!was_loaded) {
	- unmount_unload(pamh, dataset, config.force_unmount);
	- }
	free(dataset);
	pam_zfs_free();
	zfs_key_config_free(&config);
	- if (pw_clear(pamh, OLD_PASSWORD_VAR_NAME) == -1 \|\|
	- pw_clear(pamh, PASSWORD_VAR_NAME) == -1 \|\| changed == -1) {
	+ if (pw_clear(pamh) == -1) {
	return (PAM_SERVICE_ERR);
	}
	} else {
	@@ -835,7 +788,7 @@
	return (PAM_SESSION_ERR);
	}

	- if (config.uid < config.uid_min \|\| config.uid > config.uid_max) {
	+ if (config.uid < 1000) {
	zfs_key_config_free(&config);
	return (PAM_SUCCESS);
	}
	@@ -846,8 +799,7 @@
	return (PAM_SUCCESS);
	}

	- const pw_password_t *token = pw_get(pamh,
	- PAM_AUTHTOK, PASSWORD_VAR_NAME);
	+ const pw_password_t *token = pw_get(pamh);
	if (token == NULL) {
	zfs_key_config_free(&config);
	return (PAM_SESSION_ERR);
	@@ -871,7 +823,7 @@
	free(dataset);
	pam_zfs_free();
	zfs_key_config_free(&config);
	- if (pw_clear(pamh, PASSWORD_VAR_NAME) == -1) {
	+ if (pw_clear(pamh) == -1) {
	return (PAM_SERVICE_ERR);
	}
	return (PAM_SUCCESS);
	@@ -894,7 +846,7 @@
	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
	return (PAM_SESSION_ERR);
	}
	- if (config.uid < config.uid_min \|\| config.uid > config.uid_max) {
	+ if (config.uid < 1000) {
	zfs_key_config_free(&config);
	return (PAM_SUCCESS);
	}
	@@ -916,7 +868,7 @@
	zfs_key_config_free(&config);
	return (PAM_SESSION_ERR);
	}
	- if (unmount_unload(pamh, dataset, config.force_unmount) == -1) {
	+ if (unmount_unload(pamh, dataset) == -1) {
	free(dataset);
	pam_zfs_free();
	zfs_key_config_free(&config);
	diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
	--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
	+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
	@@ -75,7 +75,7 @@
	extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
	extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);

	-__attribute__((malloc, alloc_size(1)))
	+__attribute__((alloc_size(1)))
	void *zfs_kmem_alloc(size_t size, int kmflags);
	void zfs_kmem_free(void *buf, size_t size);
	uint64_t kmem_size(void);
	@@ -83,7 +83,6 @@
	int (constructor)(void , void , int), void (destructor)(void , void ),
	void (reclaim)(void ) __unused, void private, vmem_t vmp, int cflags);
	void kmem_cache_destroy(kmem_cache_t *cache);
	-__attribute__((malloc))
	void kmem_cache_alloc(kmem_cache_t cache, int flags);
	void kmem_cache_free(kmem_cache_t cache, void buf);
	boolean_t kmem_cache_reap_active(void);
	diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
	--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
	+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
	@@ -68,6 +68,7 @@
	zfs_trim,
	zfs_txg,
	zfs_vdev,
	+ zfs_vdev_cache,
	zfs_vdev_file,
	zfs_vdev_mirror,
	zfs_vnops,
	diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
	--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
	+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
	@@ -31,10 +31,10 @@
	#include <linux/vmalloc.h>

	extern int kmem_debugging(void);
	-__attribute__((format(printf, 1, 0)))
	-extern char kmem_vasprintf(const char fmt, va_list ap);
	-__attribute__((format(printf, 1, 2)))
	-extern char kmem_asprintf(const char fmt, ...);
	+extern char kmem_vasprintf(const char fmt, va_list ap)
	+ __attribute__((format(printf, 1, 0)));
	+extern char kmem_asprintf(const char fmt, ...)
	+ __attribute__((format(printf, 1, 2)));
	extern char kmem_strdup(const char str);
	extern void kmem_strfree(char *str);

	@@ -186,10 +186,10 @@
	#define kmem_free(ptr, sz) spl_kmem_free((ptr), (sz))
	#define kmem_cache_reap_active spl_kmem_cache_reap_active

	-__attribute__((malloc, alloc_size(1)))
	-extern void spl_kmem_alloc(size_t sz, int fl, const char func, int line);
	-__attribute__((malloc, alloc_size(1)))
	-extern void spl_kmem_zalloc(size_t sz, int fl, const char func, int line);
	+extern void spl_kmem_alloc(size_t sz, int fl, const char func, int line)
	+ __attribute__((alloc_size(1)));
	+extern void spl_kmem_zalloc(size_t sz, int fl, const char func, int line)
	+ __attribute__((alloc_size(1)));
	extern void spl_kmem_free(const void *ptr, size_t sz);

	/*
	diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
	--- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
	+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
	@@ -104,7 +104,6 @@
	/* list node for the cpu hotplug callback */
	struct hlist_node tq_hp_cb_node;
	boolean_t tq_hp_support;
	- unsigned long lastshouldstop; /* when to purge dynamic */
	} taskq_t;

	typedef struct taskq_ent {
	diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
	--- a/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
	+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
	@@ -91,10 +91,8 @@
	#define vmem_zalloc(sz, fl) spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
	#define vmem_free(ptr, sz) spl_vmem_free((ptr), (sz))

	-extern void spl_vmem_alloc(size_t sz, int fl, const char func, int line)
	- __attribute__((malloc, alloc_size(1)));
	-extern void spl_vmem_zalloc(size_t sz, int fl, const char func, int line)
	- __attribute__((malloc, alloc_size(1)));
	+extern void spl_vmem_alloc(size_t sz, int fl, const char func, int line);
	+extern void spl_vmem_zalloc(size_t sz, int fl, const char func, int line);
	extern void spl_vmem_free(const void *ptr, size_t sz);

	int spl_vmem_init(void);
	diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
	--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
	+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
	@@ -215,39 +215,6 @@
	TP_ARGS(zilog, zcw))
	DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);

	-/*
	- * Generic support for three argument tracepoints of the form:
	- *
	- * DTRACE_PROBE3(...,
	- * zilog_t *, ...,
	- * uint64_t, ...,
	- * uint64_t, ...);
	- */
	-/* BEGIN CSTYLED */
	-DECLARE_EVENT_CLASS(zfs_zil_block_size_class,
	- TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1),
	- TP_ARGS(zilog, res, s1),
	- TP_STRUCT__entry(
	- ZILOG_TP_STRUCT_ENTRY
	- __field(uint64_t, res)
	- __field(uint64_t, s1)
	- ),
	- TP_fast_assign(
	- ZILOG_TP_FAST_ASSIGN
	- __entry->res = res;
	- __entry->s1 = s1;
	- ),
	- TP_printk(
	- ZILOG_TP_PRINTK_FMT " res %llu s1 %llu",
	- ZILOG_TP_PRINTK_ARGS, __entry->res, __entry->s1)
	-);
	-
	-#define DEFINE_ZIL_BLOCK_SIZE_EVENT(name) \
	-DEFINE_EVENT(zfs_zil_block_size_class, name, \
	- TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1), \
	- TP_ARGS(zilog, res, s1))
	-DEFINE_ZIL_BLOCK_SIZE_EVENT(zfs_zil__block__size);
	-
	#endif /* _TRACE_ZIL_H */

	#undef TRACE_INCLUDE_PATH
	@@ -261,7 +228,6 @@
	DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
	DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
	DEFINE_DTRACE_PROBE2(zil__commit__io__error);
	-DEFINE_DTRACE_PROBE3(zil__block__size);

	#endif /* HAVE_DECLARE_EVENT_CLASS */
	#endif /* _KERNEL */
	diff --git a/sys/contrib/openzfs/include/sys/abd.h b/sys/contrib/openzfs/include/sys/abd.h
	--- a/sys/contrib/openzfs/include/sys/abd.h
	+++ b/sys/contrib/openzfs/include/sys/abd.h
	@@ -86,15 +86,10 @@
	* Allocations and deallocations
	*/

	-__attribute__((malloc))
	abd_t *abd_alloc(size_t, boolean_t);
	-__attribute__((malloc))
	abd_t *abd_alloc_linear(size_t, boolean_t);
	-__attribute__((malloc))
	abd_t *abd_alloc_gang(void);
	-__attribute__((malloc))
	abd_t *abd_alloc_for_io(size_t, boolean_t);
	-__attribute__((malloc))
	abd_t abd_alloc_sametype(abd_t , size_t);
	boolean_t abd_size_alloc_linear(size_t);
	void abd_gang_add(abd_t , abd_t , boolean_t);
	diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
	--- a/sys/contrib/openzfs/include/sys/arc.h
	+++ b/sys/contrib/openzfs/include/sys/arc.h
	@@ -304,8 +304,9 @@
	zio_t arc_write(zio_t pio, spa_t spa, uint64_t txg, blkptr_t bp,
	arc_buf_t buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t zp,
	arc_write_done_func_t ready, arc_write_done_func_t child_ready,
	- arc_write_done_func_t done, void priv, zio_priority_t priority,
	- int zio_flags, const zbookmark_phys_t *zb);
	+ arc_write_done_func_t physdone, arc_write_done_func_t done,
	+ void *priv, zio_priority_t priority, int zio_flags,
	+ const zbookmark_phys_t *zb);

	arc_prune_t arc_add_prune_callback(arc_prune_func_t func, void *priv);
	void arc_remove_prune_callback(arc_prune_t *p);
	diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
	--- a/sys/contrib/openzfs/include/sys/arc_impl.h
	+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
	@@ -123,6 +123,7 @@
	void *awcb_private;
	arc_write_done_func_t *awcb_ready;
	arc_write_done_func_t *awcb_children_ready;
	+ arc_write_done_func_t *awcb_physdone;
	arc_write_done_func_t *awcb_done;
	arc_buf_t *awcb_buf;
	};
	diff --git a/sys/contrib/openzfs/include/sys/btree.h b/sys/contrib/openzfs/include/sys/btree.h
	--- a/sys/contrib/openzfs/include/sys/btree.h
	+++ b/sys/contrib/openzfs/include/sys/btree.h
	@@ -105,13 +105,8 @@
	boolean_t bti_before;
	} zfs_btree_index_t;

	-typedef struct btree zfs_btree_t;
	-typedef void * (bt_find_in_buf_f) (zfs_btree_t , uint8_t *, uint32_t,
	- const void , zfs_btree_index_t );
	-
	-struct btree {
	+typedef struct btree {
	int (bt_compar) (const void , const void *);
	- bt_find_in_buf_f bt_find_in_buf;
	size_t bt_elem_size;
	size_t bt_leaf_size;
	uint32_t bt_leaf_cap;
	@@ -120,54 +115,7 @@
	uint64_t bt_num_nodes;
	zfs_btree_hdr_t *bt_root;
	zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
	-};
	-
	-/*
	- * Implementation of Shar's algorithm designed to accelerate binary search by
	- * eliminating impossible to predict branches.
	- *
	- * For optimality, this should be used to generate the search function in the
	- * same file as the comparator and the comparator should be marked
	- * `__attribute__((always_inline) inline` so that the compiler will inline it.
	- *
	- * Arguments are:
	- *
	- * NAME - The function name for this instance of the search function. Use it
	- * in a subsequent call to zfs_btree_create().
	- * T - The element type stored inside the B-Tree.
	- * COMP - A comparator to compare two nodes, it must return exactly: -1, 0,
	- * or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
	- * TREE_CMP() from avl.h can be used in a boilerplate function.
	- */
	-/* BEGIN CSTYLED */
	-#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
	-_Pragma("GCC diagnostic push") \
	-_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") \
	-static void * \
	-NAME(zfs_btree_t tree, uint8_t buf, uint32_t nelems, \
	- const void value, zfs_btree_index_t where) \
	-{ \
	- T i = (T )buf; \
	- (void) tree; \
	- _Pragma("GCC unroll 9") \
	- while (nelems > 1) { \
	- uint32_t half = nelems / 2; \
	- nelems -= half; \
	- i += (COMP(&i[half - 1], value) < 0) * half; \
	- } \
	- \
	- int comp = COMP(i, value); \
	- where->bti_offset = (i - (T *)buf) + (comp < 0); \
	- where->bti_before = (comp != 0); \
	- \
	- if (comp == 0) { \
	- return (i); \
	- } \
	- \
	- return (NULL); \
	-} \
	-_Pragma("GCC diagnostic pop")
	-/* END CSTYLED */
	+} zfs_btree_t;

	/*
	* Allocate and deallocate caches for btree nodes.
	@@ -181,19 +129,13 @@
	* tree - the tree to be initialized
	* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
	* -1 for <, 0 for ==, and +1 for >
	- * find - optional function to accelerate searches inside B-Tree nodes
	- * through Shar's algorithm and comparator inlining. Setting this to
	- * NULL will use a generic function. The function should be created
	- * using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
	- * compar should be marked `__attribute__((always_inline)) inline` or
	- * performance is unlikely to improve very much.
	* size - the value of sizeof(struct my_type)
	* lsize - custom leaf size
	*/
	void zfs_btree_create(zfs_btree_t , int () (const void , const void ),
	- bt_find_in_buf_f, size_t);
	+ size_t);
	void zfs_btree_create_custom(zfs_btree_t , int ()(const void , const void ),
	- bt_find_in_buf_f, size_t, size_t);
	+ size_t, size_t);

	/*
	* Find a node with a matching value in the tree. Returns the matching node
	diff --git a/sys/contrib/openzfs/include/sys/dsl_scan.h b/sys/contrib/openzfs/include/sys/dsl_scan.h
	--- a/sys/contrib/openzfs/include/sys/dsl_scan.h
	+++ b/sys/contrib/openzfs/include/sys/dsl_scan.h
	@@ -61,7 +61,7 @@
	uint64_t scn_end_time;
	uint64_t scn_to_examine; /* total bytes to be scanned */
	uint64_t scn_examined; /* bytes scanned so far */
	- uint64_t scn_skipped; /* bytes skipped by scanner */
	+ uint64_t scn_to_process;
	uint64_t scn_processed;
	uint64_t scn_errors; /* scan I/O error count */
	uint64_t scn_ddt_class_max;
	diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
	--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
	+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
	@@ -1088,7 +1088,7 @@
	uint64_t pss_end_time; /* scan end time */
	uint64_t pss_to_examine; /* total bytes to scan */
	uint64_t pss_examined; /* total bytes located by scanner */
	- uint64_t pss_skipped; /* total bytes skipped by scanner */
	+ uint64_t pss_to_process; /* total bytes to process */
	uint64_t pss_processed; /* total processed bytes */
	uint64_t pss_errors; /* scan errors */

	@@ -1152,7 +1152,6 @@
	uint64_t vrs_pass_time_ms; /* pass run time (millisecs) */
	uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
	uint64_t vrs_pass_bytes_issued; /* bytes rebuilt since start/resume */
	- uint64_t vrs_pass_bytes_skipped; /* bytes skipped since start/resume */
	} vdev_rebuild_stat_t;

	/*
	diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
	--- a/sys/contrib/openzfs/include/sys/spa.h
	+++ b/sys/contrib/openzfs/include/sys/spa.h
	@@ -723,10 +723,16 @@
	* Send TRIM commands in-line during normal pool operation while deleting.
	* OFF: no
	* ON: yes
	+ * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
	*/
	typedef enum {
	SPA_AUTOTRIM_OFF = 0, /* default */
	SPA_AUTOTRIM_ON,
	+#ifdef IN_FREEBSD_BASE
	+ SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
	+#else
	+ SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
	+#endif
	} spa_autotrim_t;

	/*
	@@ -1168,6 +1174,10 @@
	zbookmark_phys_t *zb);
	extern void name_to_errphys(char buf, zbookmark_err_phys_t zep);

	+/* vdev cache */
	+extern void vdev_cache_stat_init(void);
	+extern void vdev_cache_stat_fini(void);
	+
	/* vdev mirror */
	extern void vdev_mirror_stat_init(void);
	extern void vdev_mirror_stat_fini(void);
	diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h
	--- a/sys/contrib/openzfs/include/sys/vdev.h
	+++ b/sys/contrib/openzfs/include/sys/vdev.h
	@@ -158,15 +158,20 @@
	extern boolean_t vdev_accessible(vdev_t vd, zio_t zio);
	extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);

	+extern void vdev_cache_init(vdev_t *vd);
	+extern void vdev_cache_fini(vdev_t *vd);
	+extern boolean_t vdev_cache_read(zio_t *zio);
	+extern void vdev_cache_write(zio_t *zio);
	+extern void vdev_cache_purge(vdev_t *vd);
	+
	extern void vdev_queue_init(vdev_t *vd);
	extern void vdev_queue_fini(vdev_t *vd);
	extern zio_t vdev_queue_io(zio_t zio);
	extern void vdev_queue_io_done(zio_t *zio);
	extern void vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority);

	-extern uint32_t vdev_queue_length(vdev_t *vd);
	+extern int vdev_queue_length(vdev_t *vd);
	extern uint64_t vdev_queue_last_offset(vdev_t *vd);
	-extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p);

	extern void vdev_config_dirty(vdev_t *vd);
	extern void vdev_config_clean(vdev_t *vd);
	diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
	--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
	+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
	@@ -57,6 +57,8 @@
	* Forward declarations that lots of things need.
	*/
	typedef struct vdev_queue vdev_queue_t;
	+typedef struct vdev_cache vdev_cache_t;
	+typedef struct vdev_cache_entry vdev_cache_entry_t;
	struct abd;

	extern uint_t zfs_vdev_queue_depth_pct;
	@@ -130,24 +132,44 @@
	/*
	* Virtual device properties
	*/
	-typedef union vdev_queue_class {
	- list_t vqc_list;
	- avl_tree_t vqc_tree;
	+struct vdev_cache_entry {
	+ struct abd *ve_abd;
	+ uint64_t ve_offset;
	+ clock_t ve_lastused;
	+ avl_node_t ve_offset_node;
	+ avl_node_t ve_lastused_node;
	+ uint32_t ve_hits;
	+ uint16_t ve_missed_update;
	+ zio_t *ve_fill_io;
	+};
	+
	+struct vdev_cache {
	+ avl_tree_t vc_offset_tree;
	+ avl_tree_t vc_lastused_tree;
	+ kmutex_t vc_lock;
	+};
	+
	+typedef struct vdev_queue_class {
	+ uint32_t vqc_active;
	+
	+ /*
	+ * Sorted by offset or timestamp, depending on if the queue is
	+ * LBA-ordered vs FIFO.
	+ */
	+ avl_tree_t vqc_queued_tree;
	} vdev_queue_class_t;

	struct vdev_queue {
	vdev_t *vq_vdev;
	vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
	+ avl_tree_t vq_active_tree;
	avl_tree_t vq_read_offset_tree;
	avl_tree_t vq_write_offset_tree;
	+ avl_tree_t vq_trim_offset_tree;
	uint64_t vq_last_offset;
	zio_priority_t vq_last_prio; /* Last sent I/O priority. */
	- uint32_t vq_cqueued; /* Classes with queued I/Os. */
	- uint32_t vq_cactive[ZIO_PRIORITY_NUM_QUEUEABLE];
	- uint32_t vq_active; /* Number of active I/Os. */
	uint32_t vq_ia_active; /* Active interactive I/Os. */
	uint32_t vq_nia_credit; /* Non-interactive I/Os credit. */
	- list_t vq_active_list; /* List of active I/Os. */
	hrtime_t vq_io_complete_ts; /* time last i/o completed */
	hrtime_t vq_io_delta_ts;
	zio_t vq_io_search; /* used as local for stack reduction */
	@@ -421,6 +443,7 @@
	boolean_t vdev_resilver_deferred; /* resilver deferred */
	boolean_t vdev_kobj_flag; /* kobj event record */
	vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
	+ vdev_cache_t vdev_cache; /* physical block cache */
	spa_aux_vdev_t vdev_aux; / for l2cache and spares vdevs */
	zio_t vdev_probe_zio; / root of current probe */
	vdev_aux_t vdev_label_aux; /* on-disk aux state */
	diff --git a/sys/contrib/openzfs/include/sys/vdev_rebuild.h b/sys/contrib/openzfs/include/sys/vdev_rebuild.h
	--- a/sys/contrib/openzfs/include/sys/vdev_rebuild.h
	+++ b/sys/contrib/openzfs/include/sys/vdev_rebuild.h
	@@ -79,7 +79,6 @@
	uint64_t vr_pass_start_time;
	uint64_t vr_pass_bytes_scanned;
	uint64_t vr_pass_bytes_issued;
	- uint64_t vr_pass_bytes_skipped;

	/* On-disk state updated by vdev_rebuild_zap_update_sync() */
	vdev_rebuild_phys_t vr_rebuild_phys;
	diff --git a/sys/contrib/openzfs/include/sys/zfs_refcount.h b/sys/contrib/openzfs/include/sys/zfs_refcount.h
	--- a/sys/contrib/openzfs/include/sys/zfs_refcount.h
	+++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h
	@@ -27,7 +27,6 @@
	#define _SYS_ZFS_REFCOUNT_H

	#include <sys/inttypes.h>
	-#include <sys/avl.h>
	#include <sys/list.h>
	#include <sys/zfs_context.h>

	@@ -44,22 +43,19 @@

	#ifdef ZFS_DEBUG
	typedef struct reference {
	- union {
	- avl_node_t a;
	- list_node_t l;
	- } ref_link;
	+ list_node_t ref_link;
	const void *ref_holder;
	uint64_t ref_number;
	- boolean_t ref_search;
	+ uint8_t *ref_removed;
	} reference_t;

	typedef struct refcount {
	- uint64_t rc_count;
	kmutex_t rc_mtx;
	- avl_tree_t rc_tree;
	- list_t rc_removed;
	- uint_t rc_removed_count;
	boolean_t rc_tracked;
	+ list_t rc_list;
	+ list_t rc_removed;
	+ uint64_t rc_count;
	+ uint64_t rc_removed_count;
	} zfs_refcount_t;

	/*
	@@ -77,15 +73,13 @@
	int64_t zfs_refcount_add(zfs_refcount_t , const void );
	int64_t zfs_refcount_remove(zfs_refcount_t , const void );
	/*
	- * Note that (add\|remove)_many adds/removes one reference with "number" N,
	- * _not_ N references with "number" 1, which is what (add\|remove)_few does,
	- * or what vanilla zfs_refcount_(add\|remove) called N times would do.
	+ * Note that (add\|remove)_many add/remove one reference with "number" N,
	+ * _not_ make N references with "number" 1, which is what vanilla
	+ * zfs_refcount_(add\|remove) would do if called N times.
	*
	* Attempting to remove a reference with number N when none exists is a
	* panic on debug kernels with reference_tracking enabled.
	*/
	-void zfs_refcount_add_few(zfs_refcount_t , uint64_t, const void );
	-void zfs_refcount_remove_few(zfs_refcount_t , uint64_t, const void );
	int64_t zfs_refcount_add_many(zfs_refcount_t , uint64_t, const void );
	int64_t zfs_refcount_remove_many(zfs_refcount_t , uint64_t, const void );
	void zfs_refcount_transfer(zfs_refcount_t , zfs_refcount_t );
	@@ -114,10 +108,6 @@
	#define zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
	#define zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
	#define zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
	-#define zfs_refcount_add_few(rc, number, holder) \
	- atomic_add_64(&(rc)->rc_count, number)
	-#define zfs_refcount_remove_few(rc, number, holder) \
	- atomic_add_64(&(rc)->rc_count, -number)
	#define zfs_refcount_add_many(rc, number, holder) \
	atomic_add_64_nv(&(rc)->rc_count, number)
	#define zfs_refcount_remove_many(rc, number, holder) \
	diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h
	--- a/sys/contrib/openzfs/include/sys/zfs_znode.h
	+++ b/sys/contrib/openzfs/include/sys/zfs_znode.h
	@@ -158,7 +158,6 @@
	#define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)

	extern int zfs_obj_to_path(objset_t osp, uint64_t obj, char buf, int len);
	-extern int zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value);

	#ifdef _KERNEL
	#include <sys/zfs_znode_impl.h>
	@@ -281,6 +280,7 @@
	extern void zfs_remove_op_tables(void);
	extern int zfs_create_op_tables(void);
	extern dev_t zfs_cmpldev(uint64_t);
	+extern int zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value);
	extern int zfs_get_stats(objset_t os, nvlist_t nv);
	extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
	extern void zfs_znode_dmu_fini(znode_t *);
	diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h
	--- a/sys/contrib/openzfs/include/sys/zil.h
	+++ b/sys/contrib/openzfs/include/sys/zil.h
	@@ -489,22 +489,18 @@
	* Transactions which have been allocated to the "normal"
	* (i.e. not slog) storage pool. Note that "bytes" accumulate
	* the actual log record sizes - which do not include the actual
	- * data in case of indirect writes. bytes <= write <= alloc.
	+ * data in case of indirect writes.
	*/
	kstat_named_t zil_itx_metaslab_normal_count;
	kstat_named_t zil_itx_metaslab_normal_bytes;
	- kstat_named_t zil_itx_metaslab_normal_write;
	- kstat_named_t zil_itx_metaslab_normal_alloc;

	/*
	* Transactions which have been allocated to the "slog" storage pool.
	* If there are no separate log devices, this is the same as the
	- * "normal" pool. bytes <= write <= alloc.
	+ * "normal" pool.
	*/
	kstat_named_t zil_itx_metaslab_slog_count;
	kstat_named_t zil_itx_metaslab_slog_bytes;
	- kstat_named_t zil_itx_metaslab_slog_write;
	- kstat_named_t zil_itx_metaslab_slog_alloc;
	} zil_kstat_values_t;

	typedef struct zil_sums {
	@@ -519,12 +515,8 @@
	wmsum_t zil_itx_needcopy_bytes;
	wmsum_t zil_itx_metaslab_normal_count;
	wmsum_t zil_itx_metaslab_normal_bytes;
	- wmsum_t zil_itx_metaslab_normal_write;
	- wmsum_t zil_itx_metaslab_normal_alloc;
	wmsum_t zil_itx_metaslab_slog_count;
	wmsum_t zil_itx_metaslab_slog_bytes;
	- wmsum_t zil_itx_metaslab_slog_write;
	- wmsum_t zil_itx_metaslab_slog_alloc;
	} zil_sums_t;

	#define ZIL_STAT_INCR(zil, stat, val) \
	diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h
	--- a/sys/contrib/openzfs/include/sys/zil_impl.h
	+++ b/sys/contrib/openzfs/include/sys/zil_impl.h
	@@ -44,7 +44,7 @@
	* must be held.
	*
	* After the lwb is "opened", it can transition into the "issued" state
	- * via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
	+ * via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
	* be held when making this transition.
	*
	* After the lwb's write zio completes, it transitions into the "write
	@@ -93,23 +93,20 @@
	blkptr_t lwb_blk; /* on disk address of this log blk */
	boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
	boolean_t lwb_slog; /* lwb_blk is on SLOG device */
	- boolean_t lwb_indirect; /* do not postpone zil_lwb_commit() */
	int lwb_nused; /* # used bytes in buffer */
	- int lwb_nfilled; /* # filled bytes in buffer */
	int lwb_sz; /* size of block and buffer */
	lwb_state_t lwb_state; /* the state of this lwb */
	char lwb_buf; / log write buffer */
	zio_t lwb_write_zio; / zio for the lwb buffer */
	zio_t lwb_root_zio; / root zio for lwb write and flushes */
	- hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
	uint64_t lwb_issued_txg; /* the txg when the write is issued */
	uint64_t lwb_max_txg; /* highest txg in this lwb */
	list_node_t lwb_node; /* zilog->zl_lwb_list linkage */
	- list_node_t lwb_issue_node; /* linkage of lwbs ready for issue */
	list_t lwb_itxs; /* list of itx's */
	list_t lwb_waiters; /* list of zil_commit_waiter's */
	avl_tree_t lwb_vdev_tree; /* vdevs to flush after lwb write */
	kmutex_t lwb_vdev_lock; /* protects lwb_vdev_tree */
	+ hrtime_t lwb_issued_timestamp; /* when was the lwb issued? */
	} lwb_t;

	/*
	diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
	--- a/sys/contrib/openzfs/include/sys/zio.h
	+++ b/sys/contrib/openzfs/include/sys/zio.h
	@@ -190,6 +190,7 @@
	#define ZIO_FLAG_SPECULATIVE (1ULL << 8)
	#define ZIO_FLAG_CONFIG_WRITER (1ULL << 9)
	#define ZIO_FLAG_DONT_RETRY (1ULL << 10)
	+#define ZIO_FLAG_DONT_CACHE (1ULL << 11)
	#define ZIO_FLAG_NODATA (1ULL << 12)
	#define ZIO_FLAG_INDUCE_DAMAGE (1ULL << 13)
	#define ZIO_FLAG_IO_ALLOCATING (1ULL << 14)
	@@ -341,9 +342,9 @@
	enum zio_checksum zp_checksum;
	enum zio_compress zp_compress;
	uint8_t zp_complevel;
	+ dmu_object_type_t zp_type;
	uint8_t zp_level;
	uint8_t zp_copies;
	- dmu_object_type_t zp_type;
	boolean_t zp_dedup;
	boolean_t zp_dedup_verify;
	boolean_t zp_nopwrite;
	@@ -436,12 +437,6 @@
	list_node_t zl_child_node;
	} zio_link_t;

	-enum zio_qstate {
	- ZIO_QS_NONE = 0,
	- ZIO_QS_QUEUED,
	- ZIO_QS_ACTIVE,
	-};
	-
	struct zio {
	/* Core information about this I/O */
	zbookmark_phys_t io_bookmark;
	@@ -466,6 +461,7 @@
	/* Callback info */
	zio_done_func_t *io_ready;
	zio_done_func_t *io_children_ready;
	+ zio_done_func_t *io_physdone;
	zio_done_func_t *io_done;
	void *io_private;
	int64_t io_prev_space_delta; /* DMU private */
	@@ -485,12 +481,6 @@
	const zio_vsd_ops_t *io_vsd_ops;
	metaslab_class_t io_metaslab_class; / dva throttle class */

	- enum zio_qstate io_queue_state; /* vdev queue state */
	- union {
	- list_node_t l;
	- avl_node_t a;
	- } io_queue_node ____cacheline_aligned; /* allocator and vdev queues */
	- avl_node_t io_offset_node; /* vdev offset queues */
	uint64_t io_offset;
	hrtime_t io_timestamp; /* submitted at */
	hrtime_t io_queued_timestamp;
	@@ -498,6 +488,9 @@
	hrtime_t io_delta; /* vdev queue service delta */
	hrtime_t io_delay; /* Device access time (disk or */
	/* file). */
	+ avl_node_t io_queue_node;
	+ avl_node_t io_offset_node;
	+ avl_node_t io_alloc_node;
	zio_alloc_list_t io_alloc_list;

	/* Internal pipeline state */
	@@ -511,6 +504,9 @@
	int io_error;
	int io_child_error[ZIO_CHILD_TYPES];
	uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
	+ uint64_t io_child_count;
	+ uint64_t io_phys_children;
	+ uint64_t io_parent_count;
	uint64_t *io_stall;
	zio_t *io_gang_leader;
	zio_gang_node_t *io_gang_tree;
	@@ -558,8 +554,9 @@
	extern zio_t zio_write(zio_t pio, spa_t spa, uint64_t txg, blkptr_t bp,
	struct abd data, uint64_t size, uint64_t psize, const zio_prop_t zp,
	zio_done_func_t ready, zio_done_func_t children_ready,
	- zio_done_func_t done, void priv, zio_priority_t priority,
	- zio_flag_t flags, const zbookmark_phys_t *zb);
	+ zio_done_func_t physdone, zio_done_func_t done,
	+ void *priv, zio_priority_t priority, zio_flag_t flags,
	+ const zbookmark_phys_t *zb);

	extern zio_t zio_rewrite(zio_t pio, spa_t spa, uint64_t txg, blkptr_t bp,
	struct abd data, uint64_t size, zio_done_func_t done, void *priv,
	@@ -611,7 +608,6 @@
	extern zio_t zio_walk_children(zio_t pio, zio_link_t **);
	extern zio_t zio_unique_parent(zio_t cio);
	extern void zio_add_child(zio_t pio, zio_t cio);
	-extern void zio_add_child_first(zio_t pio, zio_t cio);

	extern void *zio_buf_alloc(size_t size);
	extern void zio_buf_free(void *buf, size_t size);
	diff --git a/sys/contrib/openzfs/lib/libspl/include/umem.h b/sys/contrib/openzfs/lib/libspl/include/umem.h
	--- a/sys/contrib/openzfs/lib/libspl/include/umem.h
	+++ b/sys/contrib/openzfs/lib/libspl/include/umem.h
	@@ -83,7 +83,7 @@
	const char *_umem_options_init(void);
	const char *_umem_logging_init(void);

	-__attribute__((malloc, alloc_size(1)))
	+__attribute__((alloc_size(1)))
	static inline void *
	umem_alloc(size_t size, int flags)
	{
	@@ -96,7 +96,7 @@
	return (ptr);
	}

	-__attribute__((malloc, alloc_size(1)))
	+__attribute__((alloc_size(1)))
	static inline void *
	umem_alloc_aligned(size_t size, size_t align, int flags)
	{
	@@ -118,7 +118,7 @@
	return (ptr);
	}

	-__attribute__((malloc, alloc_size(1)))
	+__attribute__((alloc_size(1)))
	static inline void *
	umem_zalloc(size_t size, int flags)
	{
	@@ -188,7 +188,6 @@
	umem_free(cp, sizeof (umem_cache_t));
	}

	-__attribute__((malloc))
	static inline void *
	umem_cache_alloc(umem_cache_t *cp, int flags)
	{
	diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
	--- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
	+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
	@@ -1789,8 +1789,7 @@
	nvlist_t *nvl;
	int nvl_len = 0;
	int added_resv = 0;
	- zfs_prop_t prop;
	- boolean_t nsprop = B_FALSE;
	+ zfs_prop_t prop = 0;
	nvpair_t *elem;

	(void) snprintf(errbuf, sizeof (errbuf),
	@@ -1837,7 +1836,6 @@
	elem = nvlist_next_nvpair(nvl, elem)) {

	prop = zfs_name_to_prop(nvpair_name(elem));
	- nsprop \|= zfs_is_namespace_prop(prop);

	assert(cl_idx < nvl_len);
	/*
	@@ -1936,7 +1934,8 @@
	* if one of the options handled by the generic
	* Linux namespace layer has been modified.
	*/
	- if (nsprop && zfs_is_mounted(zhp, NULL))
	+ if (zfs_is_namespace_prop(prop) &&
	+ zfs_is_mounted(zhp, NULL))
	ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
	}
	}
	diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
	--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
	+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
	@@ -135,6 +135,7 @@
	module/zfs/uberblock.c \
	module/zfs/unique.c \
	module/zfs/vdev.c \
	+ module/zfs/vdev_cache.c \
	module/zfs/vdev_draid.c \
	module/zfs/vdev_draid_rand.c \
	module/zfs/vdev_indirect.c \
	diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4
	--- a/sys/contrib/openzfs/man/man4/spl.4
	+++ b/sys/contrib/openzfs/man/man4/spl.4
	@@ -193,19 +193,4 @@
	reading it could cause a lock-up if the list grow too large
	without limiting the output.
	"(truncated)" will be shown if the list is larger than the limit.
	-.
	-.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 10000 Pq uint
	-(Linux-only)
	-How long a taskq has to have had no work before we tear it down.
	-Previously, we would tear down a dynamic taskq worker as soon
	-as we noticed it had no work, but it was observed that this led
	-to a lot of churn in tearing down things we then immediately
	-spawned anew.
	-In practice, it seems any nonzero value will remove the vast
	-majority of this churn, while the nontrivially larger value
	-was chosen to help filter out the little remaining churn on
	-a mostly idle system.
	-Setting this value to
	-.Sy 0
	-will revert to the previous behavior.
	.El
	diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
	--- a/sys/contrib/openzfs/man/man4/zfs.4
	+++ b/sys/contrib/openzfs/man/man4/zfs.4
	@@ -239,16 +239,6 @@
	Make some blocks above a certain size be gang blocks.
	This option is used by the test suite to facilitate testing.
	.
	-.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
	-Default DDT ZAP data block size as a power of 2. Note that changing this after
	-creating a DDT on the pool will not affect existing DDTs, only newly created
	-ones.
	-.
	-.It Sy zfs_ddt_zap_default_ibs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
	-Default DDT ZAP indirect block size as a power of 2. Note that changing this
	-after creating a DDT on the pool will not affect existing DDTs, only newly
	-created ones.
	-.
	.It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int
	Default dnode block size as a power of 2.
	.
	@@ -2026,12 +2016,33 @@
	Flush dirty data to disk at least every this many seconds (maximum TXG
	duration).
	.
	+.It Sy zfs_vdev_aggregate_trim Ns = Ns Sy 0 Ns \| Ns 1 Pq uint
	+Allow TRIM I/O operations to be aggregated.
	+This is normally not helpful because the extents to be trimmed
	+will have been already been aggregated by the metaslab.
	+This option is provided for debugging and performance analysis.
	+.
	.It Sy zfs_vdev_aggregation_limit Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq uint
	Max vdev I/O aggregation size.
	.
	.It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq uint
	Max vdev I/O aggregation size for non-rotating media.
	.
	+.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64 KiB Pc Pq uint
	+Shift size to inflate reads to.
	+.
	+.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq uint
	+Inflate reads smaller than this value to meet the
	+.Sy zfs_vdev_cache_bshift
	+size
	+.Pq default Sy 64 KiB .
	+.
	+.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq uint
	+Total size of the per-disk cache in bytes.
	+.Pp
	+Currently this feature is disabled, as it has been found to not be helpful
	+for performance and in some cases harmful.
	+.
	.It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
	A number by which the balancing algorithm increments the load calculation for
	the purpose of selecting the least busy mirror member when an I/O operation
	diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
	--- a/sys/contrib/openzfs/man/man7/zpool-features.7
	+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
	@@ -228,10 +228,8 @@
	filesystem_limits
	hole_birth
	large_blocks
	-livelist
	lz4_compress
	spacemap_histogram
	-zpool_checkpoint

	.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
	.Ed
	diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
	--- a/sys/contrib/openzfs/man/man8/zdb.8
	+++ b/sys/contrib/openzfs/man/man8/zdb.8
	@@ -14,7 +14,7 @@
	.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
	.\" Copyright (c) 2017 Intel Corporation.
	.\"
	-.Dd June 27, 2023
	+.Dd October 7, 2020
	.Dt ZDB 8
	.Os
	.
	@@ -41,17 +41,9 @@
	.Ar poolname Ns Op Ar / Ns Ar dataset Ns \| Ns Ar objset-ID
	.Op Ar object Ns \| Ns Ar range Ns …
	.Nm
	-.Fl B
	-.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
	-.Op Fl U Ar cache
	-.Op Fl K Ar key
	-.Ar poolname Ns Ar / Ns Ar objset-ID
	-.Op Ar backup-flags
	-.Nm
	.Fl C
	.Op Fl A
	.Op Fl U Ar cache
	-.Op Ar poolname
	.Nm
	.Fl E
	.Op Fl A
	@@ -131,22 +123,6 @@
	Display statistics regarding the number, size
	.Pq logical, physical and allocated
	and deduplication of blocks.
	-.It Fl B , -backup
	-Generate a backup stream, similar to
	-.Nm zfs Cm send ,
	-but for the numeric objset ID, and without opening the dataset.
	-This can be useful in recovery scenarios if dataset metadata has become
	-corrupted but the dataset itself is readable.
	-The optional
	-.Ar flags
	-argument is a string of one or more of the letters
	-.Sy e ,
	-.Sy L ,
	-.Sy c ,
	-and
	-.Sy w ,
	-which correspond to the same flags in
	-.Xr zfs-send 8 .
	.It Fl c , -checksum
	Verify the checksum of all metadata blocks while printing block statistics
	.Po see
	diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8
	--- a/sys/contrib/openzfs/man/man8/zfs-create.8
	+++ b/sys/contrib/openzfs/man/man8/zfs-create.8
	@@ -234,11 +234,14 @@
	Print verbose information about the created dataset.
	.El
	.El
	-.Ss ZFS for Swap
	-Swapping to a ZFS volume is prone to deadlock and not recommended.
	-See OpenZFS FAQ.
	-.Pp
	-Swapping to a file on a ZFS filesystem is not supported.
	+.Ss ZFS Volumes as Swap
	+ZFS volumes may be used as swap devices.
	+After creating the volume with the
	+.Nm zfs Cm create Fl V
	+enable the swap area using the
	+.Xr swapon 8
	+command.
	+Swapping to files on ZFS filesystems is not supported.
	.
	.Sh EXAMPLES
	.\" These are, respectively, examples 1, 10 from zfs.8
	diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8
	--- a/sys/contrib/openzfs/man/man8/zpool-create.8
	+++ b/sys/contrib/openzfs/man/man8/zpool-create.8
	@@ -87,13 +87,13 @@
	However this check is not robust enough
	to detect simultaneous attempts to use a new device in different pools, even if
	.Sy multihost Ns = Sy enabled .
	-The administrator must ensure that simultaneous invocations of any combination
	+The administrator must ensure, that simultaneous invocations of any combination
	of
	.Nm zpool Cm replace ,
	.Nm zpool Cm create ,
	.Nm zpool Cm add ,
	or
	-.Nm zpool Cm labelclear
	+.Nm zpool Cm labelclear ,
	do not refer to the same device.
	Using the same device in two pools will result in pool corruption.
	.Pp
	diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8
	--- a/sys/contrib/openzfs/man/man8/zpool-events.8
	+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
	@@ -456,6 +456,7 @@
	ZIO_FLAG_SPECULATIVE:0x00000100
	ZIO_FLAG_CONFIG_WRITER:0x00000200
	ZIO_FLAG_DONT_RETRY:0x00000400
	+ZIO_FLAG_DONT_CACHE:0x00000800
	ZIO_FLAG_NODATA:0x00001000
	ZIO_FLAG_INDUCE_DAMAGE:0x00002000

	diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8
	--- a/sys/contrib/openzfs/man/man8/zpool-scrub.8
	+++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8
	@@ -26,7 +26,7 @@
	.\" Copyright 2017 Nexenta Systems, Inc.
	.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
	.\"
	-.Dd June 22, 2023
	+.Dd July 25, 2021
	.Dt ZPOOL-SCRUB 8
	.Os
	.
	@@ -123,7 +123,7 @@
	.No # Nm zpool Cm status
	...
	scan: scrub in progress since Sun Jul 25 16:07:49 2021
	- 403M / 405M scanned at 100M/s, 68.4M / 405M issued at 10.0M/s
	+ 403M scanned at 100M/s, 68.4M issued at 10.0M/s, 405M total
	0B repaired, 16.91% done, 00:00:04 to go
	...
	.Ed
	diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
	--- a/sys/contrib/openzfs/module/Kbuild.in
	+++ b/sys/contrib/openzfs/module/Kbuild.in
	@@ -34,20 +34,6 @@
	ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
	endif

	-# Generated binary search code is particularly bad with this optimization.
	-# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
	-# is not affected when unrolling is done.
	-# Disable it until the following upstream issue is resolved:
	-# https://github.com/llvm/llvm-project/issues/62790
	-ifeq ($(CONFIG_X86),y)
	-ifeq ($(CONFIG_CC_IS_CLANG),y)
	-CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
	-CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
	-CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
	-CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
	-endif
	-endif
	-
	ifneq ($(KBUILD_EXTMOD),)
	@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
	@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
	@@ -382,6 +368,7 @@
	uberblock.o \
	unique.o \
	vdev.o \
	+ vdev_cache.o \
	vdev_draid.o \
	vdev_draid_rand.o \
	vdev_indirect.o \
	diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
	--- a/sys/contrib/openzfs/module/Makefile.bsd
	+++ b/sys/contrib/openzfs/module/Makefile.bsd
	@@ -308,6 +308,7 @@
	uberblock.c \
	unique.c \
	vdev.c \
	+ vdev_cache.c \
	vdev_draid.c \
	vdev_draid_rand.c \
	vdev_indirect.c \
	@@ -399,20 +400,6 @@

	.include <bsd.kmod.mk>

	-# Generated binary search code is particularly bad with this optimization.
	-# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
	-# is not affected when unrolling is done.
	-# Disable it until the following upstream issue is resolved:
	-# https://github.com/llvm/llvm-project/issues/62790
	-.if ${CC} == "clang"
	-.if ${MACHINE_ARCH} == "i386" \|\| ${MACHINE_ARCH} == "amd64"
	-CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
	-CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
	-CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
	-CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
	-.endif
	-.endif
	-
	CFLAGS.sysctl_os.c= -include ../zfs_config.h
	CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h

	diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
	--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
	+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
	@@ -872,6 +872,8 @@
	"Enable to bypass vdev_validate().");
	/* END CSTYLED */

	+/* vdev_cache.c */
	+
	/* vdev_mirror.c */

	/* vdev_queue.c */
	diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
	--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
	+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
	@@ -495,8 +495,10 @@
	{
	zfs_acl_node_t *aclnode;

	- while ((aclnode = list_remove_head(&aclp->z_acl)))
	+ while ((aclnode = list_head(&aclp->z_acl))) {
	+ list_remove(&aclp->z_acl, aclnode);
	zfs_acl_node_free(aclnode);
	+ }
	aclp->z_acl_count = 0;
	aclp->z_acl_bytes = 0;
	}
	diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
	--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
	+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
	@@ -2220,6 +2220,92 @@
	return (0);
	}

	+/*
	+ * Read a property stored within the master node.
	+ */
	+int
	+zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value)
	+{
	+ uint64_t *cached_copy = NULL;
	+
	+ /*
	+ * Figure out where in the objset_t the cached copy would live, if it
	+ * is available for the requested property.
	+ */
	+ if (os != NULL) {
	+ switch (prop) {
	+ case ZFS_PROP_VERSION:
	+ cached_copy = &os->os_version;
	+ break;
	+ case ZFS_PROP_NORMALIZE:
	+ cached_copy = &os->os_normalization;
	+ break;
	+ case ZFS_PROP_UTF8ONLY:
	+ cached_copy = &os->os_utf8only;
	+ break;
	+ case ZFS_PROP_CASE:
	+ cached_copy = &os->os_casesensitivity;
	+ break;
	+ default:
	+ break;
	+ }
	+ }
	+ if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
	+ value = cached_copy;
	+ return (0);
	+ }
	+
	+ /*
	+ * If the property wasn't cached, look up the file system's value for
	+ * the property. For the version property, we look up a slightly
	+ * different string.
	+ */
	+ const char *pname;
	+ int error = ENOENT;
	+ if (prop == ZFS_PROP_VERSION) {
	+ pname = ZPL_VERSION_STR;
	+ } else {
	+ pname = zfs_prop_to_name(prop);
	+ }
	+
	+ if (os != NULL) {
	+ ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
	+ error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
	+ }
	+
	+ if (error == ENOENT) {
	+ /* No value set, use the default value */
	+ switch (prop) {
	+ case ZFS_PROP_VERSION:
	+ *value = ZPL_VERSION;
	+ break;
	+ case ZFS_PROP_NORMALIZE:
	+ case ZFS_PROP_UTF8ONLY:
	+ *value = 0;
	+ break;
	+ case ZFS_PROP_CASE:
	+ *value = ZFS_CASE_SENSITIVE;
	+ break;
	+ case ZFS_PROP_ACLTYPE:
	+ *value = ZFS_ACLTYPE_NFSV4;
	+ break;
	+ default:
	+ return (error);
	+ }
	+ error = 0;
	+ }
	+
	+ /*
	+ * If one of the methods for getting the property value above worked,
	+ * copy it into the objset_t's cache.
	+ */
	+ if (error == 0 && cached_copy != NULL) {
	+ cached_copy = value;
	+ }
	+
	+ return (error);
	+}
	+
	/*
	* Return true if the corresponding vfs's unmounted flag is set.
	* Otherwise return false.
	diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
	--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
	+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
	@@ -2069,93 +2069,6 @@
	return (error);
	}

	-/*
	- * Read a property stored within the master node.
	- */
	-int
	-zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value)
	-{
	- uint64_t *cached_copy = NULL;
	-
	- /*
	- * Figure out where in the objset_t the cached copy would live, if it
	- * is available for the requested property.
	- */
	- if (os != NULL) {
	- switch (prop) {
	- case ZFS_PROP_VERSION:
	- cached_copy = &os->os_version;
	- break;
	- case ZFS_PROP_NORMALIZE:
	- cached_copy = &os->os_normalization;
	- break;
	- case ZFS_PROP_UTF8ONLY:
	- cached_copy = &os->os_utf8only;
	- break;
	- case ZFS_PROP_CASE:
	- cached_copy = &os->os_casesensitivity;
	- break;
	- default:
	- break;
	- }
	- }
	- if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
	- value = cached_copy;
	- return (0);
	- }
	-
	- /*
	- * If the property wasn't cached, look up the file system's value for
	- * the property. For the version property, we look up a slightly
	- * different string.
	- */
	- const char *pname;
	- int error = ENOENT;
	- if (prop == ZFS_PROP_VERSION) {
	- pname = ZPL_VERSION_STR;
	- } else {
	- pname = zfs_prop_to_name(prop);
	- }
	-
	- if (os != NULL) {
	- ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
	- error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
	- }
	-
	- if (error == ENOENT) {
	- /* No value set, use the default value */
	- switch (prop) {
	- case ZFS_PROP_VERSION:
	- *value = ZPL_VERSION;
	- break;
	- case ZFS_PROP_NORMALIZE:
	- case ZFS_PROP_UTF8ONLY:
	- *value = 0;
	- break;
	- case ZFS_PROP_CASE:
	- *value = ZFS_CASE_SENSITIVE;
	- break;
	- case ZFS_PROP_ACLTYPE:
	- *value = ZFS_ACLTYPE_NFSV4;
	- break;
	- default:
	- return (error);
	- }
	- error = 0;
	- }
	-
	- /*
	- * If one of the methods for getting the property value above worked,
	- * copy it into the objset_t's cache.
	- */
	- if (error == 0 && cached_copy != NULL) {
	- cached_copy = value;
	- }
	-
	- return (error);
	-}
	-
	-

	void
	zfs_znode_update_vfs(znode_t *zp)
	diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
	--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
	+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
	@@ -182,11 +182,8 @@
	* of that infrastructure we are responsible for incrementing it.
	*/
	if (current->reclaim_state)
	-#ifdef HAVE_RECLAIM_STATE_RECLAIMED
	- current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
	-#else
	current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
	-#endif
	+
	vfree(ptr);
	}

	@@ -1015,18 +1012,8 @@
	ASSERT0(flags & ~KM_PUBLIC_MASK);
	ASSERT(skc->skc_magic == SKC_MAGIC);
	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
	-
	- *obj = NULL;
	-
	- /*
	- * Since we can't sleep attempt an emergency allocation to satisfy
	- * the request. The only alterative is to fail the allocation but
	- * it's preferable try. The use of KM_NOSLEEP is expected to be rare.
	- */
	- if (flags & KM_NOSLEEP)
	- return (spl_emergency_alloc(skc, flags, obj));
	-
	might_sleep();
	+ *obj = NULL;

	/*
	* Before allocating a new slab wait for any reaping to complete and
	diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
	--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
	+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
	@@ -36,12 +36,6 @@
	module_param(spl_taskq_thread_bind, int, 0644);
	MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");

	-static uint_t spl_taskq_thread_timeout_ms = 10000;
	-/* BEGIN CSTYLED */
	-module_param(spl_taskq_thread_timeout_ms, uint, 0644);
	-/* END CSTYLED */
	-MODULE_PARM_DESC(spl_taskq_thread_timeout_ms,
	- "Time to require a dynamic thread be idle before it gets cleaned up");

	static int spl_taskq_thread_dynamic = 1;
	module_param(spl_taskq_thread_dynamic, int, 0444);
	@@ -854,37 +848,12 @@
	tqt_thread_list) == tqt)
	return (0);

	- int no_work =
	+ return
	((tq->tq_nspawn == 0) && /* No threads are being spawned */
	(tq->tq_nactive == 0) && /* No threads are handling tasks */
	(tq->tq_nthreads > 1) && /* More than 1 thread is running */
	(!taskq_next_ent(tq)) && /* There are no pending tasks */
	(spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
	-
	- /*
	- * If we would have said stop before, let's instead wait a bit, maybe
	- * we'll see more work come our way soon...
	- */
	- if (no_work) {
	- /* if it's 0, we want the old behavior. */
	- /* if the taskq is being torn down, we also want to go away. */
	- if (spl_taskq_thread_timeout_ms == 0 \|\|
	- !(tq->tq_flags & TASKQ_ACTIVE))
	- return (1);
	- unsigned long lasttime = tq->lastshouldstop;
	- if (lasttime > 0) {
	- if (time_after(jiffies, lasttime +
	- msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
	- return (1);
	- else
	- return (0);
	- } else {
	- tq->lastshouldstop = jiffies;
	- }
	- } else {
	- tq->lastshouldstop = 0;
	- }
	- return (0);
	}

	static int
	@@ -1122,7 +1091,6 @@
	tq->tq_flags = (flags \| TASKQ_ACTIVE);
	tq->tq_next_id = TASKQID_INITIAL;
	tq->tq_lowest_id = TASKQID_INITIAL;
	- tq->lastshouldstop = 0;
	INIT_LIST_HEAD(&tq->tq_free_list);
	INIT_LIST_HEAD(&tq->tq_pend_list);
	INIT_LIST_HEAD(&tq->tq_prio_list);
	diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
	--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
	+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
	@@ -219,11 +219,7 @@
	arc_reduce_target_size(ptob(sc->nr_to_scan));
	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
	if (current->reclaim_state != NULL)
	-#ifdef HAVE_RECLAIM_STATE_RECLAIMED
	- current->reclaim_state->reclaimed += sc->nr_to_scan;
	-#else
	current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
	-#endif

	/*
	* We are experiencing memory pressure which the arc_evict_zthr was
	diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
	--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
	+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
	@@ -493,8 +493,10 @@
	{
	zfs_acl_node_t *aclnode;

	- while ((aclnode = list_remove_head(&aclp->z_acl)))
	+ while ((aclnode = list_head(&aclp->z_acl))) {
	+ list_remove(&aclp->z_acl, aclnode);
	zfs_acl_node_free(aclnode);
	+ }
	aclp->z_acl_count = 0;
	aclp->z_acl_bytes = 0;
	}
	diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
	--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
	+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
	@@ -2052,6 +2052,91 @@
	return (0);
	}

	+/*
	+ * Read a property stored within the master node.
	+ */
	+int
	+zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value)
	+{
	+ uint64_t *cached_copy = NULL;
	+
	+ /*
	+ * Figure out where in the objset_t the cached copy would live, if it
	+ * is available for the requested property.
	+ */
	+ if (os != NULL) {
	+ switch (prop) {
	+ case ZFS_PROP_VERSION:
	+ cached_copy = &os->os_version;
	+ break;
	+ case ZFS_PROP_NORMALIZE:
	+ cached_copy = &os->os_normalization;
	+ break;
	+ case ZFS_PROP_UTF8ONLY:
	+ cached_copy = &os->os_utf8only;
	+ break;
	+ case ZFS_PROP_CASE:
	+ cached_copy = &os->os_casesensitivity;
	+ break;
	+ default:
	+ break;
	+ }
	+ }
	+ if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
	+ value = cached_copy;
	+ return (0);
	+ }
	+
	+ /*
	+ * If the property wasn't cached, look up the file system's value for
	+ * the property. For the version property, we look up a slightly
	+ * different string.
	+ */
	+ const char *pname;
	+ int error = ENOENT;
	+ if (prop == ZFS_PROP_VERSION)
	+ pname = ZPL_VERSION_STR;
	+ else
	+ pname = zfs_prop_to_name(prop);
	+
	+ if (os != NULL) {
	+ ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
	+ error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
	+ }
	+
	+ if (error == ENOENT) {
	+ /* No value set, use the default value */
	+ switch (prop) {
	+ case ZFS_PROP_VERSION:
	+ *value = ZPL_VERSION;
	+ break;
	+ case ZFS_PROP_NORMALIZE:
	+ case ZFS_PROP_UTF8ONLY:
	+ *value = 0;
	+ break;
	+ case ZFS_PROP_CASE:
	+ *value = ZFS_CASE_SENSITIVE;
	+ break;
	+ case ZFS_PROP_ACLTYPE:
	+ *value = ZFS_ACLTYPE_OFF;
	+ break;
	+ default:
	+ return (error);
	+ }
	+ error = 0;
	+ }
	+
	+ /*
	+ * If one of the methods for getting the property value above worked,
	+ * copy it into the objset_t's cache.
	+ */
	+ if (error == 0 && cached_copy != NULL) {
	+ cached_copy = value;
	+ }
	+
	+ return (error);
	+}
	+
	/*
	* Return true if the corresponding vfs's unmounted flag is set.
	* Otherwise return false.
	diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
	--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
	+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
	@@ -2254,91 +2254,6 @@
	return (error);
	}

	-/*
	- * Read a property stored within the master node.
	- */
	-int
	-zfs_get_zplprop(objset_t os, zfs_prop_t prop, uint64_t value)
	-{
	- uint64_t *cached_copy = NULL;
	-
	- /*
	- * Figure out where in the objset_t the cached copy would live, if it
	- * is available for the requested property.
	- */
	- if (os != NULL) {
	- switch (prop) {
	- case ZFS_PROP_VERSION:
	- cached_copy = &os->os_version;
	- break;
	- case ZFS_PROP_NORMALIZE:
	- cached_copy = &os->os_normalization;
	- break;
	- case ZFS_PROP_UTF8ONLY:
	- cached_copy = &os->os_utf8only;
	- break;
	- case ZFS_PROP_CASE:
	- cached_copy = &os->os_casesensitivity;
	- break;
	- default:
	- break;
	- }
	- }
	- if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
	- value = cached_copy;
	- return (0);
	- }
	-
	- /*
	- * If the property wasn't cached, look up the file system's value for
	- * the property. For the version property, we look up a slightly
	- * different string.
	- */
	- const char *pname;
	- int error = ENOENT;
	- if (prop == ZFS_PROP_VERSION)
	- pname = ZPL_VERSION_STR;
	- else
	- pname = zfs_prop_to_name(prop);
	-
	- if (os != NULL) {
	- ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
	- error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
	- }
	-
	- if (error == ENOENT) {
	- /* No value set, use the default value */
	- switch (prop) {
	- case ZFS_PROP_VERSION:
	- *value = ZPL_VERSION;
	- break;
	- case ZFS_PROP_NORMALIZE:
	- case ZFS_PROP_UTF8ONLY:
	- *value = 0;
	- break;
	- case ZFS_PROP_CASE:
	- *value = ZFS_CASE_SENSITIVE;
	- break;
	- case ZFS_PROP_ACLTYPE:
	- *value = ZFS_ACLTYPE_OFF;
	- break;
	- default:
	- return (error);
	- }
	- error = 0;
	- }
	-
	- /*
	- * If one of the methods for getting the property value above worked,
	- * copy it into the objset_t's cache.
	- */
	- if (error == 0 && cached_copy != NULL) {
	- cached_copy = value;
	- }
	-
	- return (error);
	-}
	-
	#if defined(_KERNEL)
	EXPORT_SYMBOL(zfs_create_fs);
	EXPORT_SYMBOL(zfs_obj_to_path);
	diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
	--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
	+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
	@@ -54,7 +54,7 @@
	static unsigned long zvol_max_discard_blocks = 16384;

	#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
	-static unsigned int zvol_open_timeout_ms = 1000;
	+static const unsigned int zvol_open_timeout_ms = 1000;
	#endif

	static unsigned int zvol_threads = 0;
	@@ -1612,9 +1612,4 @@
	"Process volblocksize blocks per thread");
	#endif

	-#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
	-module_param(zvol_open_timeout_ms, uint, 0644);
	-MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
	-#endif
	-
	/* END CSTYLED */
	diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
	--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
	+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
	@@ -160,7 +160,7 @@
	"wait \| continue \| panic", "FAILMODE", failuremode_table,
	sfeatures);
	zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
	- SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
	+ SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL,
	"on \| off", "AUTOTRIM", boolean_table, sfeatures);

	/* hidden properties */
	diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
	--- a/sys/contrib/openzfs/module/zfs/arc.c
	+++ b/sys/contrib/openzfs/module/zfs/arc.c
	@@ -965,7 +965,7 @@
	l2arc_dev_t *dev);

	/* L2ARC persistence write I/O routines. */
	-static uint64_t l2arc_log_blk_commit(l2arc_dev_t dev, zio_t pio,
	+static void l2arc_log_blk_commit(l2arc_dev_t dev, zio_t pio,
	l2arc_write_callback_t *cb);

	/* L2ARC persistence auxiliary routines. */
	@@ -6106,7 +6106,8 @@
	asize, abd,
	ZIO_CHECKSUM_OFF,
	l2arc_read_done, cb, priority,
	- zio_flags \| ZIO_FLAG_CANFAIL \|
	+ zio_flags \| ZIO_FLAG_DONT_CACHE \|
	+ ZIO_FLAG_CANFAIL \|
	ZIO_FLAG_DONT_PROPAGATE \|
	ZIO_FLAG_DONT_RETRY, B_FALSE);
	acb->acb_zio_head = rzio;
	@@ -6675,6 +6676,18 @@
	callback->awcb_children_ready(zio, buf, callback->awcb_private);
	}

	+/*
	+ * The SPA calls this callback for each physical write that happens on behalf
	+ * of a logical write. See the comment in dbuf_write_physdone() for details.
	+ */
	+static void
	+arc_write_physdone(zio_t *zio)
	+{
	+ arc_write_callback_t *cb = zio->io_private;
	+ if (cb->awcb_physdone != NULL)
	+ cb->awcb_physdone(zio, cb->awcb_buf, cb->awcb_private);
	+}
	+
	static void
	arc_write_done(zio_t *zio)
	{
	@@ -6764,9 +6777,9 @@
	arc_write(zio_t pio, spa_t spa, uint64_t txg,
	blkptr_t bp, arc_buf_t buf, boolean_t uncached, boolean_t l2arc,
	const zio_prop_t zp, arc_write_done_func_t ready,
	- arc_write_done_func_t children_ready, arc_write_done_func_t done,
	- void *private, zio_priority_t priority, int zio_flags,
	- const zbookmark_phys_t *zb)
	+ arc_write_done_func_t children_ready, arc_write_done_func_t physdone,
	+ arc_write_done_func_t done, void private, zio_priority_t priority,
	+ int zio_flags, const zbookmark_phys_t *zb)
	{
	arc_buf_hdr_t *hdr = buf->b_hdr;
	arc_write_callback_t *callback;
	@@ -6813,6 +6826,7 @@
	callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
	callback->awcb_ready = ready;
	callback->awcb_children_ready = children_ready;
	+ callback->awcb_physdone = physdone;
	callback->awcb_done = done;
	callback->awcb_private = private;
	callback->awcb_buf = buf;
	@@ -6849,7 +6863,8 @@
	abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)),
	HDR_GET_LSIZE(hdr), arc_buf_size(buf), &localprop, arc_write_ready,
	(children_ready != NULL) ? arc_write_children_ready : NULL,
	- arc_write_done, callback, priority, zio_flags, zb);
	+ arc_write_physdone, arc_write_done, callback,
	+ priority, zio_flags, zb);

	return (zio);
	}
	@@ -7851,7 +7866,8 @@
	taskq_destroy(arc_prune_taskq);

	mutex_enter(&arc_prune_mtx);
	- while ((p = list_remove_head(&arc_prune_list)) != NULL) {
	+ while ((p = list_head(&arc_prune_list)) != NULL) {
	+ list_remove(&arc_prune_list, p);
	zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
	zfs_refcount_destroy(&p->p_refcnt);
	kmem_free(p, sizeof (*p));
	@@ -8159,7 +8175,7 @@
	static uint64_t
	l2arc_write_size(l2arc_dev_t *dev)
	{
	- uint64_t size;
	+ uint64_t size, dev_size, tsize;

	/*
	* Make sure our globals have meaningful values in case the user
	@@ -8176,45 +8192,35 @@
	if (arc_warm == B_FALSE)
	size += l2arc_write_boost;

	+ /*
	+ * Make sure the write size does not exceed the size of the cache
	+ * device. This is important in l2arc_evict(), otherwise infinite
	+ * iteration can occur.
	+ */
	+ dev_size = dev->l2ad_end - dev->l2ad_start;
	+
	/* We need to add in the worst case scenario of log block overhead. */
	- size += l2arc_log_blk_overhead(size, dev);
	+ tsize = size + l2arc_log_blk_overhead(size, dev);
	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
	/*
	* Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
	* times the writesize, whichever is greater.
	*/
	- size += MAX(64 * 1024 * 1024,
	- (size * l2arc_trim_ahead) / 100);
	+ tsize += MAX(64 * 1024 * 1024,
	+ (tsize * l2arc_trim_ahead) / 100);
	}

	- /*
	- * Make sure the write size does not exceed the size of the cache
	- * device. This is important in l2arc_evict(), otherwise infinite
	- * iteration can occur.
	- */
	- if (size > dev->l2ad_end - dev->l2ad_start) {
	+ if (tsize >= dev_size) {
	cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
	"plus the overhead of log blocks (persistent L2ARC, "
	"%llu bytes) exceeds the size of the cache device "
	"(guid %llu), resetting them to the default (%d)",
	(u_longlong_t)l2arc_log_blk_overhead(size, dev),
	(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
	-
	size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;

	- if (l2arc_trim_ahead > 1) {
	- cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
	- l2arc_trim_ahead = 1;
	- }
	-
	if (arc_warm == B_FALSE)
	size += l2arc_write_boost;
	-
	- size += l2arc_log_blk_overhead(size, dev);
	- if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
	- size += MAX(64 * 1024 * 1024,
	- (size * l2arc_trim_ahead) / 100);
	- }
	}

	return (size);
	@@ -8313,14 +8319,20 @@
	static void
	l2arc_do_free_on_write(void)
	{
	- l2arc_data_free_t *df;
	+ list_t *buflist;
	+ l2arc_data_free_t df, df_prev;

	mutex_enter(&l2arc_free_on_write_mtx);
	- while ((df = list_remove_head(l2arc_free_on_write)) != NULL) {
	+ buflist = l2arc_free_on_write;
	+
	+ for (df = list_tail(buflist); df; df = df_prev) {
	+ df_prev = list_prev(buflist, df);
	ASSERT3P(df->l2df_abd, !=, NULL);
	abd_free(df->l2df_abd);
	+ list_remove(buflist, df);
	kmem_free(df, sizeof (l2arc_data_free_t));
	}
	+
	mutex_exit(&l2arc_free_on_write_mtx);
	}

	@@ -8833,7 +8845,7 @@

	top:
	rerun = B_FALSE;
	- if (dev->l2ad_hand + distance > dev->l2ad_end) {
	+ if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
	/*
	* When there is no space to accommodate upcoming writes,
	* evict to the end. Then bump the write and evict hands
	@@ -9027,7 +9039,7 @@
	*/
	ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
	if (!dev->l2ad_first)
	- ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
	+ ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
	}
	}

	@@ -9287,13 +9299,7 @@
	uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
	psize);

	- /*
	- * If the allocated size of this buffer plus the max
	- * size for the pending log block exceeds the evicted
	- * target size, terminate writing buffers for this run.
	- */
	- if (write_asize + asize +
	- sizeof (l2arc_log_blk_phys_t) > target_sz) {
	+ if ((write_asize + asize) > target_sz) {
	full = B_TRUE;
	mutex_exit(hash_lock);
	break;
	@@ -9407,14 +9413,8 @@
	* arcstat_l2_{size,asize} kstats are updated
	* internally.
	*/
	- if (l2arc_log_blk_insert(dev, hdr)) {
	- /*
	- * l2ad_hand will be adjusted in
	- * l2arc_log_blk_commit().
	- */
	- write_asize +=
	- l2arc_log_blk_commit(dev, pio, cb);
	- }
	+ if (l2arc_log_blk_insert(dev, hdr))
	+ l2arc_log_blk_commit(dev, pio, cb);

	zio_nowait(wzio);
	}
	@@ -10173,7 +10173,8 @@
	err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
	VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
	ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
	- ZIO_FLAG_CANFAIL \| ZIO_FLAG_DONT_PROPAGATE \| ZIO_FLAG_DONT_RETRY \|
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_CANFAIL \|
	+ ZIO_FLAG_DONT_PROPAGATE \| ZIO_FLAG_DONT_RETRY \|
	ZIO_FLAG_SPECULATIVE, B_FALSE));

	abd_free(abd);
	@@ -10493,10 +10494,11 @@
	cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
	cb->l2rcb_abd = abd_get_from_buf(lb, asize);
	pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
	- ZIO_FLAG_CANFAIL \| ZIO_FLAG_DONT_PROPAGATE \| ZIO_FLAG_DONT_RETRY);
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_CANFAIL \| ZIO_FLAG_DONT_PROPAGATE \|
	+ ZIO_FLAG_DONT_RETRY);
	(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
	cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
	- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL \|
	+ ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_CANFAIL \|
	ZIO_FLAG_DONT_PROPAGATE \| ZIO_FLAG_DONT_RETRY, B_FALSE));

	return (pio);
	@@ -10562,7 +10564,7 @@
	* This function allocates some memory to temporarily hold the serialized
	* buffer to be written. This is then released in l2arc_write_done.
	*/
	-static uint64_t
	+static void
	l2arc_log_blk_commit(l2arc_dev_t dev, zio_t pio, l2arc_write_callback_t *cb)
	{
	l2arc_log_blk_phys_t *lb = &dev->l2ad_log_blk;
	@@ -10673,8 +10675,6 @@
	dev->l2ad_log_ent_idx = 0;
	dev->l2ad_log_blk_payload_asize = 0;
	dev->l2ad_log_blk_payload_start = 0;
	-
	- return (asize);
	}

	/*
	diff --git a/sys/contrib/openzfs/module/zfs/bplist.c b/sys/contrib/openzfs/module/zfs/bplist.c
	--- a/sys/contrib/openzfs/module/zfs/bplist.c
	+++ b/sys/contrib/openzfs/module/zfs/bplist.c
	@@ -65,8 +65,9 @@
	bplist_entry_t *bpe;

	mutex_enter(&bpl->bpl_lock);
	- while ((bpe = list_remove_head(&bpl->bpl_list))) {
	+ while ((bpe = list_head(&bpl->bpl_list))) {
	bplist_iterate_last_removed = bpe;
	+ list_remove(&bpl->bpl_list, bpe);
	mutex_exit(&bpl->bpl_lock);
	func(arg, &bpe->bpe_blk, tx);
	kmem_free(bpe, sizeof (*bpe));
	@@ -81,7 +82,10 @@
	bplist_entry_t *bpe;

	mutex_enter(&bpl->bpl_lock);
	- while ((bpe = list_remove_head(&bpl->bpl_list)))
	+ while ((bpe = list_head(&bpl->bpl_list))) {
	+ bplist_iterate_last_removed = bpe;
	+ list_remove(&bpl->bpl_list, bpe);
	kmem_free(bpe, sizeof (*bpe));
	+ }
	mutex_exit(&bpl->bpl_lock);
	}
	diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
	--- a/sys/contrib/openzfs/module/zfs/btree.c
	+++ b/sys/contrib/openzfs/module/zfs/btree.c
	@@ -193,20 +193,14 @@

	void
	zfs_btree_create(zfs_btree_t tree, int (compar) (const void , const void ),
	- bt_find_in_buf_f bt_find_in_buf, size_t size)
	+ size_t size)
	{
	- zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
	- BTREE_LEAF_SIZE);
	+ zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
	}

	-static void *
	-zfs_btree_find_in_buf(zfs_btree_t tree, uint8_t buf, uint32_t nelems,
	- const void value, zfs_btree_index_t where);
	-
	void
	zfs_btree_create_custom(zfs_btree_t *tree,
	int (compar) (const void , const void *),
	- bt_find_in_buf_f bt_find_in_buf,
	size_t size, size_t lsize)
	{
	size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
	@@ -214,8 +208,6 @@
	ASSERT3U(size, <=, esize / 2);
	memset(tree, 0, sizeof (*tree));
	tree->bt_compar = compar;
	- tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
	- zfs_btree_find_in_buf : bt_find_in_buf;
	tree->bt_elem_size = size;
	tree->bt_leaf_size = lsize;
	tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
	@@ -311,7 +303,7 @@
	* element in the last leaf, it's in the last leaf or
	* it's not in the tree.
	*/
	- void *d = tree->bt_find_in_buf(tree,
	+ void *d = zfs_btree_find_in_buf(tree,
	last_leaf->btl_elems +
	last_leaf->btl_hdr.bth_first * size,
	last_leaf->btl_hdr.bth_count, value, &idx);
	@@ -335,7 +327,7 @@
	for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
	node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
	ASSERT3P(node, !=, NULL);
	- void *d = tree->bt_find_in_buf(tree, node->btc_elems,
	+ void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
	node->btc_hdr.bth_count, value, &idx);
	EQUIV(d != NULL, !idx.bti_before);
	if (d != NULL) {
	@@ -355,7 +347,7 @@
	*/
	zfs_btree_leaf_t *leaf = (depth == 0 ?
	(zfs_btree_leaf_t )tree->bt_root : (zfs_btree_leaf_t )node);
	- void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
	+ void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
	leaf->btl_hdr.bth_first * size,
	leaf->btl_hdr.bth_count, value, &idx);

	@@ -679,7 +671,7 @@
	zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
	zfs_btree_index_t idx;
	ASSERT(zfs_btree_is_core(par_hdr));
	- VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
	+ VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
	par_hdr->bth_count, buf, &idx), ==, NULL);
	ASSERT(idx.bti_before);
	uint32_t offset = idx.bti_offset;
	@@ -905,7 +897,7 @@
	}
	zfs_btree_index_t idx;
	zfs_btree_core_t *parent = hdr->bth_parent;
	- VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
	+ VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
	parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
	ASSERT(idx.bti_before);
	ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
	diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
	--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
	+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
	@@ -49,12 +49,8 @@
	{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 }
	+ { "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 }
	}
	};

	diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
	--- a/sys/contrib/openzfs/module/zfs/dbuf.c
	+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
	@@ -4369,6 +4369,22 @@
	rw_exit(&parent_db->db_rwlock);
	}

	+static void
	+dbuf_lightweight_physdone(zio_t *zio)
	+{
	+ dbuf_dirty_record_t *dr = zio->io_private;
	+ dsl_pool_t *dp = spa_get_dsl(zio->io_spa);
	+ ASSERT3U(dr->dr_txg, ==, zio->io_txg);
	+
	+ /*
	+ * The callback will be called io_phys_children times. Retire one
	+ * portion of our dirty space each time we are called. Any rounding
	+ * error will be cleaned up by dbuf_lightweight_done().
	+ */
	+ int delta = dr->dr_accounted / zio->io_phys_children;
	+ dsl_pool_undirty_space(dp, delta, zio->io_txg);
	+}
	+
	static void
	dbuf_lightweight_done(zio_t *zio)
	{
	@@ -4387,8 +4403,16 @@
	dsl_dataset_block_born(ds, zio->io_bp, tx);
	}

	- dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
	- zio->io_txg);
	+ /*
	+ * See comment in dbuf_write_done().
	+ */
	+ if (zio->io_phys_children == 0) {
	+ dsl_pool_undirty_space(dmu_objset_pool(os),
	+ dr->dr_accounted, zio->io_txg);
	+ } else {
	+ dsl_pool_undirty_space(dmu_objset_pool(os),
	+ dr->dr_accounted % zio->io_phys_children, zio->io_txg);
	+ }

	abd_free(dr->dt.dll.dr_abd);
	kmem_free(dr, sizeof (*dr));
	@@ -4422,7 +4446,8 @@
	dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd,
	dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd),
	&dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL,
	- dbuf_lightweight_done, dr, ZIO_PRIORITY_ASYNC_WRITE,
	+ dbuf_lightweight_physdone, dbuf_lightweight_done, dr,
	+ ZIO_PRIORITY_ASYNC_WRITE,
	ZIO_FLAG_MUSTSUCCEED \| dr->dt.dll.dr_flags, &zb);

	zio_nowait(dr->dr_zio);
	@@ -4764,6 +4789,37 @@
	DB_DNODE_EXIT(db);
	}

	+/*
	+ * The SPA will call this callback several times for each zio - once
	+ * for every physical child i/o (zio->io_phys_children times). This
	+ * allows the DMU to monitor the progress of each logical i/o. For example,
	+ * there may be 2 copies of an indirect block, or many fragments of a RAID-Z
	+ * block. There may be a long delay before all copies/fragments are completed,
	+ * so this callback allows us to retire dirty space gradually, as the physical
	+ * i/os complete.
	+ */
	+static void
	+dbuf_write_physdone(zio_t zio, arc_buf_t buf, void *arg)
	+{
	+ (void) buf;
	+ dmu_buf_impl_t *db = arg;
	+ objset_t *os = db->db_objset;
	+ dsl_pool_t *dp = dmu_objset_pool(os);
	+ dbuf_dirty_record_t *dr;
	+ int delta = 0;
	+
	+ dr = db->db_data_pending;
	+ ASSERT3U(dr->dr_txg, ==, zio->io_txg);
	+
	+ /*
	+ * The callback will be called io_phys_children times. Retire one
	+ * portion of our dirty space each time we are called. Any rounding
	+ * error will be cleaned up by dbuf_write_done().
	+ */
	+ delta = dr->dr_accounted / zio->io_phys_children;
	+ dsl_pool_undirty_space(dp, delta, zio->io_txg);
	+}
	+
	static void
	dbuf_write_done(zio_t zio, arc_buf_t buf, void *vdb)
	{
	@@ -4838,8 +4894,27 @@
	db->db_data_pending = NULL;
	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);

	- dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
	- zio->io_txg);
	+ /*
	+ * If we didn't do a physical write in this ZIO and we
	+ * still ended up here, it means that the space of the
	+ * dbuf that we just released (and undirtied) above hasn't
	+ * been marked as undirtied in the pool's accounting.
	+ *
	+ * Thus, we undirty that space in the pool's view of the
	+ * world here. For physical writes this type of update
	+ * happens in dbuf_write_physdone().
	+ *
	+ * If we did a physical write, cleanup any rounding errors
	+ * that came up due to writing multiple copies of a block
	+ * on disk [see dbuf_write_physdone()].
	+ */
	+ if (zio->io_phys_children == 0) {
	+ dsl_pool_undirty_space(dmu_objset_pool(os),
	+ dr->dr_accounted, zio->io_txg);
	+ } else {
	+ dsl_pool_undirty_space(dmu_objset_pool(os),
	+ dr->dr_accounted % zio->io_phys_children, zio->io_txg);
	+ }

	kmem_free(dr, sizeof (dbuf_dirty_record_t));
	}
	@@ -5087,7 +5162,7 @@

	dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy,
	contents, db->db.db_size, db->db.db_size, &zp,
	- dbuf_write_override_ready, NULL,
	+ dbuf_write_override_ready, NULL, NULL,
	dbuf_write_override_done,
	dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
	mutex_enter(&db->db_mtx);
	@@ -5101,7 +5176,7 @@
	zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
	dr->dr_zio = zio_write(pio, os->os_spa, txg,
	&dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
	- dbuf_write_nofill_ready, NULL,
	+ dbuf_write_nofill_ready, NULL, NULL,
	dbuf_write_nofill_done, db,
	ZIO_PRIORITY_ASYNC_WRITE,
	ZIO_FLAG_MUSTSUCCEED \| ZIO_FLAG_NODATA, &zb);
	@@ -5120,8 +5195,9 @@
	dr->dr_zio = arc_write(pio, os->os_spa, txg,
	&dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db),
	dbuf_is_l2cacheable(db), &zp, dbuf_write_ready,
	- children_ready_cb, dbuf_write_done, db,
	- ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
	+ children_ready_cb, dbuf_write_physdone,
	+ dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
	+ ZIO_FLAG_MUSTSUCCEED, &zb);
	}
	}

	diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
	--- a/sys/contrib/openzfs/module/zfs/ddt.c
	+++ b/sys/contrib/openzfs/module/zfs/ddt.c
	@@ -1209,19 +1209,10 @@
	ASSERT3S(dde->dde_class, <, DDT_CLASSES);

	ddp = &dde->dde_phys[BP_GET_NDVAS(bp)];
	-
	- /*
	- * This entry already existed (dde_type is real), so it must
	- * have refcnt >0 at the start of this txg. We are called from
	- * brt_pending_apply(), before frees are issued, so the refcnt
	- * can't be lowered yet. Therefore, it must be >0. We assert
	- * this because if the order of BRT and DDT interactions were
	- * ever to change and the refcnt was ever zero here, then
	- * likely further action is required to fill out the DDT entry,
	- * and this is a place that is likely to be missed in testing.
	- */
	- ASSERT3U(ddp->ddp_refcnt, >, 0);
	-
	+ if (ddp->ddp_refcnt == 0) {
	+ /* This should never happen? */
	+ ddt_phys_fill(ddp, bp);
	+ }
	ddt_phys_addref(ddp);
	result = B_TRUE;
	} else {
	diff --git a/sys/contrib/openzfs/module/zfs/ddt_zap.c b/sys/contrib/openzfs/module/zfs/ddt_zap.c
	--- a/sys/contrib/openzfs/module/zfs/ddt_zap.c
	+++ b/sys/contrib/openzfs/module/zfs/ddt_zap.c
	@@ -31,8 +31,8 @@
	#include <sys/zap.h>
	#include <sys/dmu_tx.h>

	-static unsigned int ddt_zap_default_bs = 15;
	-static unsigned int ddt_zap_default_ibs = 15;
	+static const int ddt_zap_leaf_blockshift = 12;
	+static const int ddt_zap_indirect_blockshift = 12;

	static int
	ddt_zap_create(objset_t os, uint64_t objectp, dmu_tx_t *tx, boolean_t prehash)
	@@ -43,7 +43,7 @@
	flags \|= ZAP_FLAG_PRE_HASHED_KEY;

	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
	- ddt_zap_default_bs, ddt_zap_default_ibs,
	+ ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
	DMU_OT_NONE, 0, tx);

	return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
	@@ -166,10 +166,3 @@
	ddt_zap_walk,
	ddt_zap_count,
	};
	-
	-/* BEGIN CSTYLED */
	-ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
	- "DDT ZAP leaf blockshift");
	-ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
	- "DDT ZAP indirect blockshift");
	-/* END CSTYLED */
	diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
	--- a/sys/contrib/openzfs/module/zfs/dmu.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu.c
	@@ -1698,7 +1698,7 @@
	zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
	abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
	zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
	- dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done,
	+ dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
	dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));

	return (0);
	@@ -1864,7 +1864,7 @@

	zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp,
	dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db),
	- &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa,
	+ &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
	ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));

	return (0);
	diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
	--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
	@@ -1698,7 +1698,7 @@

	zio = arc_write(pio, os->os_spa, tx->tx_txg,
	blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os),
	- &zp, dmu_objset_write_ready, NULL, dmu_objset_write_done,
	+ &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
	os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

	/*
	@@ -1755,8 +1755,9 @@
	taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);

	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
	- while ((dr = list_remove_head(list)) != NULL) {
	+ while ((dr = list_head(list)) != NULL) {
	ASSERT0(dr->dr_dbuf->db_level);
	+ list_remove(list, dr);
	zio_nowait(dr->dr_zio);
	}

	diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
	--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
	@@ -1371,8 +1371,8 @@
	dnode_t *dn;
	abd_t *abd = rrd->abd;
	zio_cksum_t bp_cksum = bp->blk_cksum;
	- zio_flag_t flags = ZIO_FLAG_SPECULATIVE \| ZIO_FLAG_DONT_RETRY \|
	- ZIO_FLAG_CANFAIL;
	+ zio_flag_t flags = ZIO_FLAG_SPECULATIVE \|
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_RETRY \| ZIO_FLAG_CANFAIL;

	if (rwa->raw)
	flags \|= ZIO_FLAG_RAW;
	diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
	--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
	@@ -1955,7 +1955,7 @@
	{
	dsl_dataset_t *to_ds = dspp->to_ds;
	dsl_pool_t *dp = dspp->dp;
	-
	+#ifdef _KERNEL
	if (dmu_objset_type(os) == DMU_OST_ZFS) {
	uint64_t version;
	if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0)
	@@ -1964,6 +1964,7 @@
	if (version >= ZPL_VERSION_SA)
	*featureflags \|= DMU_BACKUP_FEATURE_SA_SPILL;
	}
	+#endif

	/* raw sends imply large_block_ok */
	if ((dspp->rawok \|\| dspp->large_block_ok) &&
	@@ -2792,7 +2793,6 @@
	}

	if (err == 0) {
	- owned = B_TRUE;
	err = zap_lookup(dspp.dp->dp_meta_objset,
	dspp.to_ds->ds_object,
	DS_FIELD_RESUME_TOGUID, 8, 1,
	@@ -2806,24 +2806,21 @@
	sizeof (dspp.saved_toname),
	dspp.saved_toname);
	}
	- /* Only disown if there was an error in the lookups */
	- if (owned && (err != 0))
	+ if (err != 0)
	dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);

	kmem_strfree(name);
	} else {
	err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
	FTAG, &dspp.to_ds);
	- if (err == 0)
	- owned = B_TRUE;
	}
	+ owned = B_TRUE;
	} else {
	err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
	&dspp.to_ds);
	}

	if (err != 0) {
	- /* Note: dsl dataset is not owned at this point */
	dsl_pool_rele(dspp.dp, FTAG);
	return (err);
	}
	diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
	--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
	@@ -1396,7 +1396,8 @@
	{
	dmu_tx_callback_t *dcb;

	- while ((dcb = list_remove_tail(cb_list)) != NULL) {
	+ while ((dcb = list_tail(cb_list)) != NULL) {
	+ list_remove(cb_list, dcb);
	dcb->dcb_func(dcb->dcb_data, error);
	kmem_free(dcb, sizeof (dmu_tx_callback_t));
	}
	diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
	--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
	+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
	@@ -520,7 +520,8 @@
	issued = pf_end - pf_start + ipf_end - ipf_start;
	if (issued > 1) {
	/* More references on top of taken in dmu_zfetch_prepare(). */
	- zfs_refcount_add_few(&zs->zs_refs, issued - 1, NULL);
	+ for (int i = 0; i < issued - 1; i++)
	+ zfs_refcount_add(&zs->zs_refs, NULL);
	} else if (issued == 0) {
	/* Some other thread has done our work, so drop the ref. */
	if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
	diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
	--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
	+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
	@@ -3782,7 +3782,8 @@
	if (l == NULL \|\| !list_link_active(&l->list_head))
	return;

	- while ((snap = list_remove_tail(l)) != NULL) {
	+ while ((snap = list_tail(l)) != NULL) {
	+ list_remove(l, snap);
	dsl_dataset_rele(snap->ds, tag);
	kmem_free(snap, sizeof (*snap));
	}
	diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
	--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
	+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
	@@ -1490,7 +1490,7 @@
	if (tr_cookie == NULL)
	return;

	- while ((tr = list_remove_head(tr_list)) != NULL) {
	+ while ((tr = list_head(tr_list)) != NULL) {
	if (tr->tr_ds) {
	mutex_enter(&tr->tr_ds->dd_lock);
	ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
	@@ -1500,6 +1500,7 @@
	} else {
	arc_tempreserve_clear(tr->tr_size);
	}
	+ list_remove(tr_list, tr);
	kmem_free(tr, sizeof (struct tempreserve));
	}

	diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
	--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
	+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
	@@ -234,7 +234,7 @@
	static int zfs_free_bpobj_enabled = 1;

	/* Error blocks to be scrubbed in one txg. */
	-static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
	+uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;

	/* the order has to match pool_scan_type */
	static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
	@@ -573,8 +573,7 @@
	* counter to how far we've scanned. We know we're consistent
	* up to here.
	*/
	- scn->scn_issued_before_pass = scn->scn_phys.scn_examined -
	- scn->scn_phys.scn_skipped;
	+ scn->scn_issued_before_pass = scn->scn_phys.scn_examined;

	if (dsl_scan_is_running(scn) &&
	spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
	@@ -3438,8 +3437,10 @@
	* If we were suspended in the middle of processing,
	* requeue any unfinished sios and exit.
	*/
	- while ((sio = list_remove_head(&sio_list)) != NULL)
	+ while ((sio = list_head(&sio_list)) != NULL) {
	+ list_remove(&sio_list, sio);
	scan_io_queue_insert_impl(queue, sio);
	+ }

	queue->q_zio = NULL;
	mutex_exit(q_lock);
	@@ -4363,7 +4364,7 @@
	* Disabled by default, set zfs_scan_report_txgs to report
	* average performance over the last zfs_scan_report_txgs TXGs.
	*/
	- if (zfs_scan_report_txgs != 0 &&
	+ if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
	tx->tx_txg % zfs_scan_report_txgs == 0) {
	scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
	spa_scan_stat_init(spa);
	@@ -4565,15 +4566,6 @@
	all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
	}

	-static void
	-count_block_skipped(dsl_scan_t scn, const blkptr_t bp, boolean_t all)
	-{
	- if (BP_IS_EMBEDDED(bp))
	- return;
	- atomic_add_64(&scn->scn_phys.scn_skipped,
	- all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
	-}
	-
	static void
	count_block(zfs_all_blkstats_t zab, const blkptr_t bp)
	{
	@@ -4719,7 +4711,7 @@
	count_block(dp->dp_blkstats, bp);
	if (phys_birth <= scn->scn_phys.scn_min_txg \|\|
	phys_birth >= scn->scn_phys.scn_max_txg) {
	- count_block_skipped(scn, bp, B_TRUE);
	+ count_block_issued(spa, bp, B_TRUE);
	return (0);
	}

	@@ -4760,7 +4752,7 @@
	if (needs_io && !zfs_no_scrub_io) {
	dsl_scan_enqueue(dp, bp, zio_flags, zb);
	} else {
	- count_block_skipped(scn, bp, B_TRUE);
	+ count_block_issued(spa, bp, B_TRUE);
	}

	/* do not relocate this block */
	@@ -4885,7 +4877,6 @@
	* with single operation. Plus it makes scrubs more sequential and reduces
	* chances that minor extent change move it within the B-tree.
	*/
	-__attribute__((always_inline)) inline
	static int
	ext_size_compare(const void x, const void y)
	{
	@@ -4894,17 +4885,13 @@
	return (TREE_CMP(a, b));
	}

	-ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
	- ext_size_compare)
	-
	static void
	ext_size_create(range_tree_t rt, void arg)
	{
	(void) rt;
	zfs_btree_t *size_tree = arg;

	- zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
	- sizeof (uint64_t));
	+ zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
	}

	static void
	@@ -5129,9 +5116,9 @@
	ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
	range_tree_remove_fill(queue->q_exts_by_addr, start, size);

	- /* count the block as though we skipped it */
	+ /* count the block as though we issued it */
	sio2bp(sio, &tmpbp);
	- count_block_skipped(scn, &tmpbp, B_FALSE);
	+ count_block_issued(spa, &tmpbp, B_FALSE);

	sio_free(sio);
	}
	diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
	--- a/sys/contrib/openzfs/module/zfs/fm.c
	+++ b/sys/contrib/openzfs/module/zfs/fm.c
	@@ -148,7 +148,8 @@
	list_remove(&zevent_list, ev);

	/* Remove references to this event in all private file data */
	- while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {
	+ while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
	+ list_remove(&ev->ev_ze_list, ze);
	ze->ze_zevent = NULL;
	ze->ze_dropped++;
	}
	diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
	--- a/sys/contrib/openzfs/module/zfs/metaslab.c
	+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
	@@ -1342,7 +1342,6 @@
	* Comparison function for the private size-ordered tree using 32-bit
	* ranges. Tree is sorted by size, larger sizes at the end of the tree.
	*/
	-__attribute__((always_inline)) inline
	static int
	metaslab_rangesize32_compare(const void x1, const void x2)
	{
	@@ -1353,15 +1352,16 @@
	uint64_t rs_size2 = r2->rs_end - r2->rs_start;

	int cmp = TREE_CMP(rs_size1, rs_size2);
	+ if (likely(cmp))
	+ return (cmp);

	- return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
	+ return (TREE_CMP(r1->rs_start, r2->rs_start));
	}

	/*
	* Comparison function for the private size-ordered tree using 64-bit
	* ranges. Tree is sorted by size, larger sizes at the end of the tree.
	*/
	-__attribute__((always_inline)) inline
	static int
	metaslab_rangesize64_compare(const void x1, const void x2)
	{
	@@ -1372,10 +1372,11 @@
	uint64_t rs_size2 = r2->rs_end - r2->rs_start;

	int cmp = TREE_CMP(rs_size1, rs_size2);
	+ if (likely(cmp))
	+ return (cmp);

	- return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
	+ return (TREE_CMP(r1->rs_start, r2->rs_start));
	}
	-
	typedef struct metaslab_rt_arg {
	zfs_btree_t *mra_bt;
	uint32_t mra_floor_shift;
	@@ -1411,13 +1412,6 @@
	range_tree_walk(rt, metaslab_size_sorted_add, &arg);
	}

	-
	-ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
	- range_seg32_t, metaslab_rangesize32_compare)
	-
	-ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
	- range_seg64_t, metaslab_rangesize64_compare)
	-
	/*
	* Create any block allocator specific components. The current allocators
	* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
	@@ -1430,22 +1424,19 @@

	size_t size;
	int (compare) (const void , const void *);
	- bt_find_in_buf_f bt_find;
	switch (rt->rt_type) {
	case RANGE_SEG32:
	size = sizeof (range_seg32_t);
	compare = metaslab_rangesize32_compare;
	- bt_find = metaslab_rt_find_rangesize32_in_buf;
	break;
	case RANGE_SEG64:
	size = sizeof (range_seg64_t);
	compare = metaslab_rangesize64_compare;
	- bt_find = metaslab_rt_find_rangesize64_in_buf;
	break;
	default:
	panic("Invalid range seg type %d", rt->rt_type);
	}
	- zfs_btree_create(size_tree, compare, bt_find, size);
	+ zfs_btree_create(size_tree, compare, size);
	mrap->mra_floor_shift = metaslab_by_size_min_shift;
	}

	@@ -5650,7 +5641,8 @@
	* We reserve the slots individually so that we can unreserve
	* them individually when an I/O completes.
	*/
	- zfs_refcount_add_few(&mca->mca_alloc_slots, slots, zio);
	+ for (int d = 0; d < slots; d++)
	+ zfs_refcount_add(&mca->mca_alloc_slots, zio);
	zio->io_flags \|= ZIO_FLAG_IO_ALLOCATING;
	return (B_TRUE);
	}
	@@ -5664,7 +5656,8 @@
	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];

	ASSERT(mc->mc_alloc_throttle_enabled);
	- zfs_refcount_remove_few(&mca->mca_alloc_slots, slots, zio);
	+ for (int d = 0; d < slots; d++)
	+ zfs_refcount_remove(&mca->mca_alloc_slots, zio);
	}

	static int
	diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
	--- a/sys/contrib/openzfs/module/zfs/range_tree.c
	+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
	@@ -151,7 +151,6 @@
	rt->rt_histogram[idx]--;
	}

	-__attribute__((always_inline)) inline
	static int
	range_tree_seg32_compare(const void x1, const void x2)
	{
	@@ -164,7 +163,6 @@
	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
	}

	-__attribute__((always_inline)) inline
	static int
	range_tree_seg64_compare(const void x1, const void x2)
	{
	@@ -177,7 +175,6 @@
	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
	}

	-__attribute__((always_inline)) inline
	static int
	range_tree_seg_gap_compare(const void x1, const void x2)
	{
	@@ -190,15 +187,6 @@
	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
	}

	-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t,
	- range_tree_seg32_compare)
	-
	-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
	- range_tree_seg64_compare)
	-
	-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t,
	- range_tree_seg_gap_compare)
	-
	range_tree_t *
	range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
	void *arg, uint64_t start, uint64_t shift, uint64_t gap)
	@@ -209,27 +197,23 @@
	ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
	size_t size;
	int (compare) (const void , const void *);
	- bt_find_in_buf_f bt_find;
	switch (type) {
	case RANGE_SEG32:
	size = sizeof (range_seg32_t);
	compare = range_tree_seg32_compare;
	- bt_find = range_tree_seg32_find_in_buf;
	break;
	case RANGE_SEG64:
	size = sizeof (range_seg64_t);
	compare = range_tree_seg64_compare;
	- bt_find = range_tree_seg64_find_in_buf;
	break;
	case RANGE_SEG_GAP:
	size = sizeof (range_seg_gap_t);
	compare = range_tree_seg_gap_compare;
	- bt_find = range_tree_seg_gap_find_in_buf;
	break;
	default:
	panic("Invalid range seg type %d", type);
	}
	- zfs_btree_create(&rt->rt_root, compare, bt_find, size);
	+ zfs_btree_create(&rt->rt_root, compare, size);

	rt->rt_ops = ops;
	rt->rt_gap = gap;
	diff --git a/sys/contrib/openzfs/module/zfs/refcount.c b/sys/contrib/openzfs/module/zfs/refcount.c
	--- a/sys/contrib/openzfs/module/zfs/refcount.c
	+++ b/sys/contrib/openzfs/module/zfs/refcount.c
	@@ -36,40 +36,33 @@
	static uint_t reference_history = 3; /* tunable */

	static kmem_cache_t *reference_cache;
	+static kmem_cache_t *reference_history_cache;

	void
	zfs_refcount_init(void)
	{
	reference_cache = kmem_cache_create("reference_cache",
	sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
	+
	+ reference_history_cache = kmem_cache_create("reference_history_cache",
	+ sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
	}

	void
	zfs_refcount_fini(void)
	{
	kmem_cache_destroy(reference_cache);
	-}
	-
	-static int
	-zfs_refcount_compare(const void x1, const void x2)
	-{
	- const reference_t r1 = (const reference_t )x1;
	- const reference_t r2 = (const reference_t )x2;
	-
	- int cmp1 = TREE_CMP(r1->ref_holder, r2->ref_holder);
	- int cmp2 = TREE_CMP(r1->ref_number, r2->ref_number);
	- int cmp = cmp1 ? cmp1 : cmp2;
	- return ((cmp \|\| r1->ref_search) ? cmp : TREE_PCMP(r1, r2));
	+ kmem_cache_destroy(reference_history_cache);
	}

	void
	zfs_refcount_create(zfs_refcount_t *rc)
	{
	mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL);
	- avl_create(&rc->rc_tree, zfs_refcount_compare, sizeof (reference_t),
	- offsetof(reference_t, ref_link.a));
	+ list_create(&rc->rc_list, sizeof (reference_t),
	+ offsetof(reference_t, ref_link));
	list_create(&rc->rc_removed, sizeof (reference_t),
	- offsetof(reference_t, ref_link.l));
	+ offsetof(reference_t, ref_link));
	rc->rc_count = 0;
	rc->rc_removed_count = 0;
	rc->rc_tracked = reference_tracking_enable;
	@@ -93,15 +86,19 @@
	zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number)
	{
	reference_t *ref;
	- void *cookie = NULL;

	ASSERT3U(rc->rc_count, ==, number);
	- while ((ref = avl_destroy_nodes(&rc->rc_tree, &cookie)) != NULL)
	+ while ((ref = list_head(&rc->rc_list))) {
	+ list_remove(&rc->rc_list, ref);
	kmem_cache_free(reference_cache, ref);
	- avl_destroy(&rc->rc_tree);
	+ }
	+ list_destroy(&rc->rc_list);

	- while ((ref = list_remove_head(&rc->rc_removed)))
	+ while ((ref = list_head(&rc->rc_removed))) {
	+ list_remove(&rc->rc_removed, ref);
	+ kmem_cache_free(reference_history_cache, ref->ref_removed);
	kmem_cache_free(reference_cache, ref);
	+ }
	list_destroy(&rc->rc_removed);
	mutex_destroy(&rc->rc_mtx);
	}
	@@ -127,10 +124,10 @@
	int64_t
	zfs_refcount_add_many(zfs_refcount_t rc, uint64_t number, const void holder)
	{
	- reference_t *ref;
	+ reference_t *ref = NULL;
	int64_t count;

	- if (likely(!rc->rc_tracked)) {
	+ if (!rc->rc_tracked) {
	count = atomic_add_64_nv(&(rc)->rc_count, number);
	ASSERT3U(count, >=, number);
	return (count);
	@@ -139,9 +136,8 @@
	ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
	ref->ref_holder = holder;
	ref->ref_number = number;
	- ref->ref_search = B_FALSE;
	mutex_enter(&rc->rc_mtx);
	- avl_add(&rc->rc_tree, ref);
	+ list_insert_head(&rc->rc_list, ref);
	rc->rc_count += number;
	count = rc->rc_count;
	mutex_exit(&rc->rc_mtx);
	@@ -155,55 +151,51 @@
	return (zfs_refcount_add_many(rc, 1, holder));
	}

	-void
	-zfs_refcount_add_few(zfs_refcount_t rc, uint64_t number, const void holder)
	-{
	- if (likely(!rc->rc_tracked))
	- (void) zfs_refcount_add_many(rc, number, holder);
	- else for (; number > 0; number--)
	- (void) zfs_refcount_add(rc, holder);
	-}
	-
	int64_t
	zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
	const void *holder)
	{
	- reference_t *ref, s;
	+ reference_t *ref;
	int64_t count;

	- if (likely(!rc->rc_tracked)) {
	+ if (!rc->rc_tracked) {
	count = atomic_add_64_nv(&(rc)->rc_count, -number);
	ASSERT3S(count, >=, 0);
	return (count);
	}

	- s.ref_holder = holder;
	- s.ref_number = number;
	- s.ref_search = B_TRUE;
	mutex_enter(&rc->rc_mtx);
	ASSERT3U(rc->rc_count, >=, number);
	- ref = avl_find(&rc->rc_tree, &s, NULL);
	- if (unlikely(ref == NULL)) {
	- panic("No such hold %p on refcount %llx", holder,
	- (u_longlong_t)(uintptr_t)rc);
	- return (-1);
	- }
	- avl_remove(&rc->rc_tree, ref);
	- if (reference_history > 0) {
	- list_insert_head(&rc->rc_removed, ref);
	- if (rc->rc_removed_count >= reference_history) {
	- ref = list_remove_tail(&rc->rc_removed);
	- kmem_cache_free(reference_cache, ref);
	- } else {
	- rc->rc_removed_count++;
	+ for (ref = list_head(&rc->rc_list); ref;
	+ ref = list_next(&rc->rc_list, ref)) {
	+ if (ref->ref_holder == holder && ref->ref_number == number) {
	+ list_remove(&rc->rc_list, ref);
	+ if (reference_history > 0) {
	+ ref->ref_removed =
	+ kmem_cache_alloc(reference_history_cache,
	+ KM_SLEEP);
	+ list_insert_head(&rc->rc_removed, ref);
	+ rc->rc_removed_count++;
	+ if (rc->rc_removed_count > reference_history) {
	+ ref = list_tail(&rc->rc_removed);
	+ list_remove(&rc->rc_removed, ref);
	+ kmem_cache_free(reference_history_cache,
	+ ref->ref_removed);
	+ kmem_cache_free(reference_cache, ref);
	+ rc->rc_removed_count--;
	+ }
	+ } else {
	+ kmem_cache_free(reference_cache, ref);
	+ }
	+ rc->rc_count -= number;
	+ count = rc->rc_count;
	+ mutex_exit(&rc->rc_mtx);
	+ return (count);
	}
	- } else {
	- kmem_cache_free(reference_cache, ref);
	}
	- rc->rc_count -= number;
	- count = rc->rc_count;
	- mutex_exit(&rc->rc_mtx);
	- return (count);
	+ panic("No such hold %p on refcount %llx", holder,
	+ (u_longlong_t)(uintptr_t)rc);
	+ return (-1);
	}

	int64_t
	@@ -212,50 +204,34 @@
	return (zfs_refcount_remove_many(rc, 1, holder));
	}

	-void
	-zfs_refcount_remove_few(zfs_refcount_t rc, uint64_t number, const void holder)
	-{
	- if (likely(!rc->rc_tracked))
	- (void) zfs_refcount_remove_many(rc, number, holder);
	- else for (; number > 0; number--)
	- (void) zfs_refcount_remove(rc, holder);
	-}
	-
	void
	zfs_refcount_transfer(zfs_refcount_t dst, zfs_refcount_t src)
	{
	- avl_tree_t tree;
	- list_t removed;
	- reference_t *ref;
	- void *cookie = NULL;
	- uint64_t count;
	- uint_t removed_count;
	+ int64_t count, removed_count;
	+ list_t list, removed;

	- avl_create(&tree, zfs_refcount_compare, sizeof (reference_t),
	- offsetof(reference_t, ref_link.a));
	+ list_create(&list, sizeof (reference_t),
	+ offsetof(reference_t, ref_link));
	list_create(&removed, sizeof (reference_t),
	- offsetof(reference_t, ref_link.l));
	+ offsetof(reference_t, ref_link));

	mutex_enter(&src->rc_mtx);
	count = src->rc_count;
	removed_count = src->rc_removed_count;
	src->rc_count = 0;
	src->rc_removed_count = 0;
	- avl_swap(&tree, &src->rc_tree);
	+ list_move_tail(&list, &src->rc_list);
	list_move_tail(&removed, &src->rc_removed);
	mutex_exit(&src->rc_mtx);

	mutex_enter(&dst->rc_mtx);
	dst->rc_count += count;
	dst->rc_removed_count += removed_count;
	- if (avl_is_empty(&dst->rc_tree))
	- avl_swap(&dst->rc_tree, &tree);
	- else while ((ref = avl_destroy_nodes(&tree, &cookie)) != NULL)
	- avl_add(&dst->rc_tree, ref);
	+ list_move_tail(&dst->rc_list, &list);
	list_move_tail(&dst->rc_removed, &removed);
	mutex_exit(&dst->rc_mtx);

	- avl_destroy(&tree);
	+ list_destroy(&list);
	list_destroy(&removed);
	}

	@@ -263,19 +239,23 @@
	zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
	const void current_holder, const void new_holder)
	{
	- reference_t *ref, s;
	+ reference_t *ref;
	+ boolean_t found = B_FALSE;

	- if (likely(!rc->rc_tracked))
	+ if (!rc->rc_tracked)
	return;

	- s.ref_holder = current_holder;
	- s.ref_number = number;
	- s.ref_search = B_TRUE;
	mutex_enter(&rc->rc_mtx);
	- ref = avl_find(&rc->rc_tree, &s, NULL);
	- ASSERT(ref);
	- ref->ref_holder = new_holder;
	- avl_update(&rc->rc_tree, ref);
	+ for (ref = list_head(&rc->rc_list); ref;
	+ ref = list_next(&rc->rc_list, ref)) {
	+ if (ref->ref_holder == current_holder &&
	+ ref->ref_number == number) {
	+ ref->ref_holder = new_holder;
	+ found = B_TRUE;
	+ break;
	+ }
	+ }
	+ ASSERT(found);
	mutex_exit(&rc->rc_mtx);
	}

	@@ -295,23 +275,21 @@
	boolean_t
	zfs_refcount_held(zfs_refcount_t rc, const void holder)
	{
	- reference_t *ref, s;
	- avl_index_t idx;
	- boolean_t res;
	+ reference_t *ref;

	- if (likely(!rc->rc_tracked))
	+ if (!rc->rc_tracked)
	return (zfs_refcount_count(rc) > 0);

	- s.ref_holder = holder;
	- s.ref_number = 0;
	- s.ref_search = B_TRUE;
	mutex_enter(&rc->rc_mtx);
	- ref = avl_find(&rc->rc_tree, &s, &idx);
	- if (likely(ref == NULL))
	- ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER);
	- res = ref && ref->ref_holder == holder;
	+ for (ref = list_head(&rc->rc_list); ref;
	+ ref = list_next(&rc->rc_list, ref)) {
	+ if (ref->ref_holder == holder) {
	+ mutex_exit(&rc->rc_mtx);
	+ return (B_TRUE);
	+ }
	+ }
	mutex_exit(&rc->rc_mtx);
	- return (res);
	+ return (B_FALSE);
	}

	/*
	@@ -322,23 +300,21 @@
	boolean_t
	zfs_refcount_not_held(zfs_refcount_t rc, const void holder)
	{
	- reference_t *ref, s;
	- avl_index_t idx;
	- boolean_t res;
	+ reference_t *ref;

	- if (likely(!rc->rc_tracked))
	+ if (!rc->rc_tracked)
	return (B_TRUE);

	mutex_enter(&rc->rc_mtx);
	- s.ref_holder = holder;
	- s.ref_number = 0;
	- s.ref_search = B_TRUE;
	- ref = avl_find(&rc->rc_tree, &s, &idx);
	- if (likely(ref == NULL))
	- ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER);
	- res = ref == NULL \|\| ref->ref_holder != holder;
	+ for (ref = list_head(&rc->rc_list); ref;
	+ ref = list_next(&rc->rc_list, ref)) {
	+ if (ref->ref_holder == holder) {
	+ mutex_exit(&rc->rc_mtx);
	+ return (B_FALSE);
	+ }
	+ }
	mutex_exit(&rc->rc_mtx);
	- return (res);
	+ return (B_TRUE);
	}

	EXPORT_SYMBOL(zfs_refcount_create);
	diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
	--- a/sys/contrib/openzfs/module/zfs/spa.c
	+++ b/sys/contrib/openzfs/module/zfs/spa.c
	@@ -33,7 +33,6 @@
	* Copyright 2017 Joyent, Inc.
	* Copyright (c) 2017, Intel Corporation.
	* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
	- * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
	*/

	/*
	@@ -1609,16 +1608,16 @@
	{
	void *cookie = NULL;
	spa_log_sm_t *sls;
	- log_summary_entry_t *e;
	-
	while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
	&cookie)) != NULL) {
	VERIFY0(sls->sls_mscount);
	kmem_free(sls, sizeof (spa_log_sm_t));
	}

	- while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) {
	+ for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
	+ e != NULL; e = list_head(&spa->spa_log_summary)) {
	VERIFY0(e->lse_mscount);
	+ list_remove(&spa->spa_log_summary, e);
	kmem_free(e, sizeof (log_summary_entry_t));
	}

	@@ -6875,11 +6874,9 @@
	if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
	return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));

	- if (dsl_scan_resilvering(spa_get_dsl(spa)) \|\|
	- dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
	+ if (dsl_scan_resilvering(spa_get_dsl(spa)))
	return (spa_vdev_exit(spa, NULL, txg,
	ZFS_ERR_RESILVER_IN_PROGRESS));
	- }
	} else {
	if (vdev_rebuild_active(rvd))
	return (spa_vdev_exit(spa, NULL, txg,
	diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
	--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
	+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
	@@ -730,7 +730,7 @@
	mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
	NULL);
	avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
	- sizeof (zio_t), offsetof(zio_t, io_queue_node.a));
	+ sizeof (zio_t), offsetof(zio_t, io_alloc_node));
	}
	avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
	sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
	@@ -814,7 +814,8 @@
	if (spa->spa_root)
	spa_strfree(spa->spa_root);

	- while ((dp = list_remove_head(&spa->spa_config_list)) != NULL) {
	+ while ((dp = list_head(&spa->spa_config_list)) != NULL) {
	+ list_remove(&spa->spa_config_list, dp);
	if (dp->scd_path != NULL)
	spa_strfree(dp->scd_path);
	kmem_free(dp, sizeof (spa_config_dirent_t));
	@@ -2438,6 +2439,7 @@
	zio_init();
	dmu_init();
	zil_init();
	+ vdev_cache_stat_init();
	vdev_mirror_stat_init();
	vdev_raidz_math_init();
	vdev_file_init();
	@@ -2461,6 +2463,7 @@
	spa_evict_all();

	vdev_file_fini();
	+ vdev_cache_stat_fini();
	vdev_mirror_stat_fini();
	vdev_raidz_math_fini();
	chksum_fini();
	@@ -2611,7 +2614,7 @@
	ps->pss_end_time = scn->scn_phys.scn_end_time;
	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
	ps->pss_examined = scn->scn_phys.scn_examined;
	- ps->pss_skipped = scn->scn_phys.scn_skipped;
	+ ps->pss_to_process = scn->scn_phys.scn_to_process;
	ps->pss_processed = scn->scn_phys.scn_processed;
	ps->pss_errors = scn->scn_phys.scn_errors;

	diff --git a/sys/contrib/openzfs/module/zfs/txg.c b/sys/contrib/openzfs/module/zfs/txg.c
	--- a/sys/contrib/openzfs/module/zfs/txg.c
	+++ b/sys/contrib/openzfs/module/zfs/txg.c
	@@ -895,10 +895,15 @@
	boolean_t
	txg_all_lists_empty(txg_list_t *tl)
	{
	- boolean_t res = B_TRUE;
	- for (int i = 0; i < TXG_SIZE; i++)
	- res &= (tl->tl_head[i] == NULL);
	- return (res);
	+ mutex_enter(&tl->tl_lock);
	+ for (int i = 0; i < TXG_SIZE; i++) {
	+ if (!txg_list_empty_impl(tl, i)) {
	+ mutex_exit(&tl->tl_lock);
	+ return (B_FALSE);
	+ }
	+ }
	+ mutex_exit(&tl->tl_lock);
	+ return (B_TRUE);
	}

	/*
	diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
	--- a/sys/contrib/openzfs/module/zfs/vdev.c
	+++ b/sys/contrib/openzfs/module/zfs/vdev.c
	@@ -29,7 +29,7 @@
	* Copyright (c) 2017, Intel Corporation.
	* Copyright (c) 2019, Datto Inc. All rights reserved.
	* Copyright (c) 2021, Klara Inc.
	- * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
	+ * Copyright [2021] Hewlett Packard Enterprise Development LP
	*/

	#include <sys/zfs_context.h>
	@@ -715,6 +715,7 @@
	offsetof(struct vdev, vdev_dtl_node));
	vd->vdev_stat.vs_timestamp = gethrtime();
	vdev_queue_init(vd);
	+ vdev_cache_init(vd);

	return (vd);
	}
	@@ -1095,6 +1096,7 @@
	* Clean up vdev structure.
	*/
	vdev_queue_fini(vd);
	+ vdev_cache_fini(vd);

	if (vd->vdev_path)
	spa_strfree(vd->vdev_path);
	@@ -1718,7 +1720,8 @@
	vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);

	vps->vps_flags = ZIO_FLAG_CANFAIL \| ZIO_FLAG_PROBE \|
	- ZIO_FLAG_DONT_AGGREGATE \| ZIO_FLAG_TRYHARD;
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_AGGREGATE \|
	+ ZIO_FLAG_TRYHARD;

	if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
	/*
	@@ -2609,6 +2612,8 @@

	vd->vdev_ops->vdev_op_close(vd);

	+ vdev_cache_purge(vd);
	+
	/*
	* We record the previous state before we close it, so that if we are
	* doing a reopen(), we don't generate FMA ereports if we notice that
	@@ -2694,17 +2699,6 @@
	(void) vdev_validate(vd);
	}

	- /*
	- * Recheck if resilver is still needed and cancel any
	- * scheduled resilver if resilver is unneeded.
	- */
	- if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
	- spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
	- mutex_enter(&spa->spa_async_lock);
	- spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
	- mutex_exit(&spa->spa_async_lock);
	- }
	-
	/*
	* Reassess parent vdev's health.
	*/
	@@ -4608,9 +4602,11 @@

	memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));

	- for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
	- vsx->vsx_active_queue[t] = vd->vdev_queue.vq_cactive[t];
	- vsx->vsx_pend_queue[t] = vdev_queue_class_length(vd, t);
	+ for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
	+ vsx->vsx_active_queue[t] =
	+ vd->vdev_queue.vq_class[t].vqc_active;
	+ vsx->vsx_pend_queue[t] = avl_numnodes(
	+ &vd->vdev_queue.vq_class[t].vqc_queued_tree);
	}
	}
	}
	@@ -5468,20 +5464,20 @@
	vdev_queue_t *vq = &vd->vdev_queue;

	mutex_enter(&vq->vq_lock);
	- if (vq->vq_active > 0) {
	+ if (avl_numnodes(&vq->vq_active_tree) > 0) {
	spa_t *spa = vd->vdev_spa;
	zio_t *fio;
	uint64_t delta;

	- zfs_dbgmsg("slow vdev: %s has %u active IOs",
	- vd->vdev_path, vq->vq_active);
	+ zfs_dbgmsg("slow vdev: %s has %lu active IOs",
	+ vd->vdev_path, avl_numnodes(&vq->vq_active_tree));

	/*
	* Look at the head of all the pending queues,
	* if any I/O has been outstanding for longer than
	* the spa_deadman_synctime invoke the deadman logic.
	*/
	- fio = list_head(&vq->vq_active_list);
	+ fio = avl_first(&vq->vq_active_tree);
	delta = gethrtime() - fio->io_timestamp;
	if (delta > spa_deadman_synctime(spa))
	zio_deadman(fio, tag);
	diff --git a/sys/contrib/openzfs/module/zfs/vdev_cache.c b/sys/contrib/openzfs/module/zfs/vdev_cache.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/contrib/openzfs/module/zfs/vdev_cache.c
	@@ -0,0 +1,436 @@
	+/*
	+ * CDDL HEADER START
	+ *
	+ * The contents of this file are subject to the terms of the
	+ * Common Development and Distribution License (the "License").
	+ * You may not use this file except in compliance with the License.
	+ *
	+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	+ * or https://opensource.org/licenses/CDDL-1.0.
	+ * See the License for the specific language governing permissions
	+ * and limitations under the License.
	+ *
	+ * When distributing Covered Code, include this CDDL HEADER in each
	+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	+ * If applicable, add the following below this CDDL HEADER, with the
	+ * fields enclosed by brackets "[]" replaced with your own identifying
	+ * information: Portions Copyright [yyyy] [name of copyright owner]
	+ *
	+ * CDDL HEADER END
	+ */
	+/*
	+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
	+ * Use is subject to license terms.
	+ */
	+/*
	+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
	+ */
	+
	+#include <sys/zfs_context.h>
	+#include <sys/spa.h>
	+#include <sys/vdev_impl.h>
	+#include <sys/zio.h>
	+#include <sys/kstat.h>
	+#include <sys/abd.h>
	+
	+/*
	+ * Virtual device read-ahead caching.
	+ *
	+ * This file implements a simple LRU read-ahead cache. When the DMU reads
	+ * a given block, it will often want other, nearby blocks soon thereafter.
	+ * We take advantage of this by reading a larger disk region and caching
	+ * the result. In the best case, this can turn 128 back-to-back 512-byte
	+ * reads into a single 64k read followed by 127 cache hits; this reduces
	+ * latency dramatically. In the worst case, it can turn an isolated 512-byte
	+ * read into a 64k read, which doesn't affect latency all that much but is
	+ * terribly wasteful of bandwidth. A more intelligent version of the cache
	+ * could keep track of access patterns and not do read-ahead unless it sees
	+ * at least two temporally close I/Os to the same region. Currently, only
	+ * metadata I/O is inflated. A further enhancement could take advantage of
	+ * more semantic information about the I/O. And it could use something
	+ * faster than an AVL tree; that was chosen solely for convenience.
	+ *
	+ * There are five cache operations: allocate, fill, read, write, evict.
	+ *
	+ * (1) Allocate. This reserves a cache entry for the specified region.
	+ * We separate the allocate and fill operations so that multiple threads
	+ * don't generate I/O for the same cache miss.
	+ *
	+ * (2) Fill. When the I/O for a cache miss completes, the fill routine
	+ * places the data in the previously allocated cache entry.
	+ *
	+ * (3) Read. Read data from the cache.
	+ *
	+ * (4) Write. Update cache contents after write completion.
	+ *
	+ * (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry
	+ * if the total cache size exceeds zfs_vdev_cache_size.
	+ */
	+
	+/*
	+ * These tunables are for performance analysis.
	+ */
	+/*
	+ * All i/os smaller than zfs_vdev_cache_max will be turned into
	+ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
	+ * track buffer). At most zfs_vdev_cache_size bytes will be kept in each
	+ * vdev's vdev_cache.
	+ *
	+ * TODO: Note that with the current ZFS code, it turns out that the
	+ * vdev cache is not helpful, and in some cases actually harmful. It
	+ * is better if we disable this. Once some time has passed, we should
	+ * actually remove this to simplify the code. For now we just disable
	+ * it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
	+ * has made these same changes.
	+ */
	+static uint_t zfs_vdev_cache_max = 1 << 14; /* 16KB */
	+static uint_t zfs_vdev_cache_size = 0;
	+static uint_t zfs_vdev_cache_bshift = 16;
	+
	+#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
	+
	+static kstat_t *vdc_ksp = NULL;
	+
	+typedef struct vdc_stats {
	+ kstat_named_t vdc_stat_delegations;
	+ kstat_named_t vdc_stat_hits;
	+ kstat_named_t vdc_stat_misses;
	+} vdc_stats_t;
	+
	+static vdc_stats_t vdc_stats = {
	+ { "delegations", KSTAT_DATA_UINT64 },
	+ { "hits", KSTAT_DATA_UINT64 },
	+ { "misses", KSTAT_DATA_UINT64 }
	+};
	+
	+#define VDCSTAT_BUMP(stat) atomic_inc_64(&vdc_stats.stat.value.ui64);
	+
	+static inline int
	+vdev_cache_offset_compare(const void a1, const void a2)
	+{
	+ const vdev_cache_entry_t ve1 = (const vdev_cache_entry_t )a1;
	+ const vdev_cache_entry_t ve2 = (const vdev_cache_entry_t )a2;
	+
	+ return (TREE_CMP(ve1->ve_offset, ve2->ve_offset));
	+}
	+
	+static int
	+vdev_cache_lastused_compare(const void a1, const void a2)
	+{
	+ const vdev_cache_entry_t ve1 = (const vdev_cache_entry_t )a1;
	+ const vdev_cache_entry_t ve2 = (const vdev_cache_entry_t )a2;
	+
	+ int cmp = TREE_CMP(ve1->ve_lastused, ve2->ve_lastused);
	+ if (likely(cmp))
	+ return (cmp);
	+
	+ /*
	+ * Among equally old entries, sort by offset to ensure uniqueness.
	+ */
	+ return (vdev_cache_offset_compare(a1, a2));
	+}
	+
	+/*
	+ * Evict the specified entry from the cache.
	+ */
	+static void
	+vdev_cache_evict(vdev_cache_t vc, vdev_cache_entry_t ve)
	+{
	+ ASSERT(MUTEX_HELD(&vc->vc_lock));
	+ ASSERT3P(ve->ve_fill_io, ==, NULL);
	+ ASSERT3P(ve->ve_abd, !=, NULL);
	+
	+ avl_remove(&vc->vc_lastused_tree, ve);
	+ avl_remove(&vc->vc_offset_tree, ve);
	+ abd_free(ve->ve_abd);
	+ kmem_free(ve, sizeof (vdev_cache_entry_t));
	+}
	+
	+/*
	+ * Allocate an entry in the cache. At the point we don't have the data,
	+ * we're just creating a placeholder so that multiple threads don't all
	+ * go off and read the same blocks.
	+ */
	+static vdev_cache_entry_t *
	+vdev_cache_allocate(zio_t *zio)
	+{
	+ vdev_cache_t *vc = &zio->io_vd->vdev_cache;
	+ uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
	+ vdev_cache_entry_t *ve;
	+
	+ ASSERT(MUTEX_HELD(&vc->vc_lock));
	+
	+ if (zfs_vdev_cache_size == 0)
	+ return (NULL);
	+
	+ /*
	+ * If adding a new entry would exceed the cache size,
	+ * evict the oldest entry (LRU).
	+ */
	+ if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
	+ zfs_vdev_cache_size) {
	+ ve = avl_first(&vc->vc_lastused_tree);
	+ if (ve->ve_fill_io != NULL)
	+ return (NULL);
	+ ASSERT3U(ve->ve_hits, !=, 0);
	+ vdev_cache_evict(vc, ve);
	+ }
	+
	+ ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
	+ ve->ve_offset = offset;
	+ ve->ve_lastused = ddi_get_lbolt();
	+ ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE);
	+
	+ avl_add(&vc->vc_offset_tree, ve);
	+ avl_add(&vc->vc_lastused_tree, ve);
	+
	+ return (ve);
	+}
	+
	+static void
	+vdev_cache_hit(vdev_cache_t vc, vdev_cache_entry_t ve, zio_t *zio)
	+{
	+ uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
	+
	+ ASSERT(MUTEX_HELD(&vc->vc_lock));
	+ ASSERT3P(ve->ve_fill_io, ==, NULL);
	+
	+ if (ve->ve_lastused != ddi_get_lbolt()) {
	+ avl_remove(&vc->vc_lastused_tree, ve);
	+ ve->ve_lastused = ddi_get_lbolt();
	+ avl_add(&vc->vc_lastused_tree, ve);
	+ }
	+
	+ ve->ve_hits++;
	+ abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size);
	+}
	+
	+/*
	+ * Fill a previously allocated cache entry with data.
	+ */
	+static void
	+vdev_cache_fill(zio_t *fio)
	+{
	+ vdev_t *vd = fio->io_vd;
	+ vdev_cache_t *vc = &vd->vdev_cache;
	+ vdev_cache_entry_t *ve = fio->io_private;
	+ zio_t *pio;
	+
	+ ASSERT3U(fio->io_size, ==, VCBS);
	+
	+ /*
	+ * Add data to the cache.
	+ */
	+ mutex_enter(&vc->vc_lock);
	+
	+ ASSERT3P(ve->ve_fill_io, ==, fio);
	+ ASSERT3U(ve->ve_offset, ==, fio->io_offset);
	+ ASSERT3P(ve->ve_abd, ==, fio->io_abd);
	+
	+ ve->ve_fill_io = NULL;
	+
	+ /*
	+ * Even if this cache line was invalidated by a missed write update,
	+ * any reads that were queued up before the missed update are still
	+ * valid, so we can satisfy them from this line before we evict it.
	+ */
	+ zio_link_t *zl = NULL;
	+ while ((pio = zio_walk_parents(fio, &zl)) != NULL)
	+ vdev_cache_hit(vc, ve, pio);
	+
	+ if (fio->io_error \|\| ve->ve_missed_update)
	+ vdev_cache_evict(vc, ve);
	+
	+ mutex_exit(&vc->vc_lock);
	+}
	+
	+/*
	+ * Read data from the cache. Returns B_TRUE cache hit, B_FALSE on miss.
	+ */
	+boolean_t
	+vdev_cache_read(zio_t *zio)
	+{
	+ vdev_cache_t *vc = &zio->io_vd->vdev_cache;
	+ vdev_cache_entry_t *ve, ve_search;
	+ uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
	+ zio_t *fio;
	+ uint64_t cache_phase __maybe_unused = P2PHASE(zio->io_offset, VCBS);
	+
	+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
	+
	+ if (zfs_vdev_cache_size == 0)
	+ return (B_FALSE);
	+
	+ if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
	+ return (B_FALSE);
	+
	+ if (zio->io_size > zfs_vdev_cache_max)
	+ return (B_FALSE);
	+
	+ /*
	+ * If the I/O straddles two or more cache blocks, don't cache it.
	+ */
	+ if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
	+ return (B_FALSE);
	+
	+ ASSERT3U(cache_phase + zio->io_size, <=, VCBS);
	+
	+ mutex_enter(&vc->vc_lock);
	+
	+ ve_search.ve_offset = cache_offset;
	+ ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
	+
	+ if (ve != NULL) {
	+ if (ve->ve_missed_update) {
	+ mutex_exit(&vc->vc_lock);
	+ return (B_FALSE);
	+ }
	+
	+ if ((fio = ve->ve_fill_io) != NULL) {
	+ zio_vdev_io_bypass(zio);
	+ zio_add_child(zio, fio);
	+ mutex_exit(&vc->vc_lock);
	+ VDCSTAT_BUMP(vdc_stat_delegations);
	+ return (B_TRUE);
	+ }
	+
	+ vdev_cache_hit(vc, ve, zio);
	+ zio_vdev_io_bypass(zio);
	+
	+ mutex_exit(&vc->vc_lock);
	+ VDCSTAT_BUMP(vdc_stat_hits);
	+ return (B_TRUE);
	+ }
	+
	+ ve = vdev_cache_allocate(zio);
	+
	+ if (ve == NULL) {
	+ mutex_exit(&vc->vc_lock);
	+ return (B_FALSE);
	+ }
	+
	+ fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
	+ ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
	+ ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
	+
	+ ve->ve_fill_io = fio;
	+ zio_vdev_io_bypass(zio);
	+ zio_add_child(zio, fio);
	+
	+ mutex_exit(&vc->vc_lock);
	+ zio_nowait(fio);
	+ VDCSTAT_BUMP(vdc_stat_misses);
	+
	+ return (B_TRUE);
	+}
	+
	+/*
	+ * Update cache contents upon write completion.
	+ */
	+void
	+vdev_cache_write(zio_t *zio)
	+{
	+ vdev_cache_t *vc = &zio->io_vd->vdev_cache;
	+ vdev_cache_entry_t *ve, ve_search;
	+ uint64_t io_start = zio->io_offset;
	+ uint64_t io_end = io_start + zio->io_size;
	+ uint64_t min_offset = P2ALIGN(io_start, VCBS);
	+ uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
	+ avl_index_t where;
	+
	+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
	+
	+ mutex_enter(&vc->vc_lock);
	+
	+ ve_search.ve_offset = min_offset;
	+ ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
	+
	+ if (ve == NULL)
	+ ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
	+
	+ while (ve != NULL && ve->ve_offset < max_offset) {
	+ uint64_t start = MAX(ve->ve_offset, io_start);
	+ uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
	+
	+ if (ve->ve_fill_io != NULL) {
	+ ve->ve_missed_update = 1;
	+ } else {
	+ abd_copy_off(ve->ve_abd, zio->io_abd,
	+ start - ve->ve_offset, start - io_start,
	+ end - start);
	+ }
	+ ve = AVL_NEXT(&vc->vc_offset_tree, ve);
	+ }
	+ mutex_exit(&vc->vc_lock);
	+}
	+
	+void
	+vdev_cache_purge(vdev_t *vd)
	+{
	+ vdev_cache_t *vc = &vd->vdev_cache;
	+ vdev_cache_entry_t *ve;
	+
	+ mutex_enter(&vc->vc_lock);
	+ while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
	+ vdev_cache_evict(vc, ve);
	+ mutex_exit(&vc->vc_lock);
	+}
	+
	+void
	+vdev_cache_init(vdev_t *vd)
	+{
	+ vdev_cache_t *vc = &vd->vdev_cache;
	+
	+ mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
	+
	+ avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
	+ sizeof (vdev_cache_entry_t),
	+ offsetof(struct vdev_cache_entry, ve_offset_node));
	+
	+ avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
	+ sizeof (vdev_cache_entry_t),
	+ offsetof(struct vdev_cache_entry, ve_lastused_node));
	+}
	+
	+void
	+vdev_cache_fini(vdev_t *vd)
	+{
	+ vdev_cache_t *vc = &vd->vdev_cache;
	+
	+ vdev_cache_purge(vd);
	+
	+ avl_destroy(&vc->vc_offset_tree);
	+ avl_destroy(&vc->vc_lastused_tree);
	+
	+ mutex_destroy(&vc->vc_lock);
	+}
	+
	+void
	+vdev_cache_stat_init(void)
	+{
	+ vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc",
	+ KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t),
	+ KSTAT_FLAG_VIRTUAL);
	+ if (vdc_ksp != NULL) {
	+ vdc_ksp->ks_data = &vdc_stats;
	+ kstat_install(vdc_ksp);
	+ }
	+}
	+
	+void
	+vdev_cache_stat_fini(void)
	+{
	+ if (vdc_ksp != NULL) {
	+ kstat_delete(vdc_ksp);
	+ vdc_ksp = NULL;
	+ }
	+}
	+
	+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_max, UINT, ZMOD_RW,
	+ "Inflate reads small than max");
	+
	+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_size, UINT, ZMOD_RD,
	+ "Total size of the per-disk cache");
	+
	+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_bshift, UINT, ZMOD_RW,
	+ "Shift size to inflate reads too");
	diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
	--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
	+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
	@@ -293,16 +293,17 @@
	indirect_vsd_t *iv = zio->io_vsd;

	indirect_split_t *is;
	- while ((is = list_remove_head(&iv->iv_splits)) != NULL) {
	+ while ((is = list_head(&iv->iv_splits)) != NULL) {
	for (int c = 0; c < is->is_children; c++) {
	indirect_child_t *ic = &is->is_child[c];
	if (ic->ic_data != NULL)
	abd_free(ic->ic_data);
	}
	+ list_remove(&iv->iv_splits, is);

	indirect_child_t *ic;
	- while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
	- ;
	+ while ((ic = list_head(&is->is_unique_child)) != NULL)
	+ list_remove(&is->is_unique_child, ic);

	list_destroy(&is->is_unique_child);

	@@ -1658,8 +1659,8 @@
	for (indirect_split_t *is = list_head(&iv->iv_splits);
	is != NULL; is = list_next(&iv->iv_splits, is)) {
	indirect_child_t *ic;
	- while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
	- ;
	+ while ((ic = list_head(&is->is_unique_child)) != NULL)
	+ list_remove(&is->is_unique_child, ic);

	is->is_unique_children = 0;
	}
	diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
	--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
	+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
	@@ -486,9 +486,6 @@
	if (vd->vdev_isspare)
	fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);

	- if (flags & VDEV_CONFIG_L2CACHE)
	- fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
	-
	if (!(flags & (VDEV_CONFIG_SPARE \| VDEV_CONFIG_L2CACHE)) &&
	vd == vd->vdev_top) {
	fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
	diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
	--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
	+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
	@@ -228,6 +228,13 @@
	*/
	uint_t zfs_vdev_def_queue_depth = 32;

	+/*
	+ * Allow TRIM I/Os to be aggregated. This should normally not be needed since
	+ * TRIM I/O for extents up to zfs_trim_extent_bytes_max (128M) can be submitted
	+ * by the TRIM code in zfs_trim.c.
	+ */
	+static uint_t zfs_vdev_aggregate_trim = 0;
	+
	static int
	vdev_queue_offset_compare(const void x1, const void x2)
	{
	@@ -242,60 +249,38 @@
	return (TREE_PCMP(z1, z2));
	}

	-#define VDQ_T_SHIFT 29
	+static inline avl_tree_t *
	+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
	+{
	+ return (&vq->vq_class[p].vqc_queued_tree);
	+}
	+
	+static inline avl_tree_t *
	+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
	+{
	+ ASSERT(t == ZIO_TYPE_READ \|\| t == ZIO_TYPE_WRITE \|\| t == ZIO_TYPE_TRIM);
	+ if (t == ZIO_TYPE_READ)
	+ return (&vq->vq_read_offset_tree);
	+ else if (t == ZIO_TYPE_WRITE)
	+ return (&vq->vq_write_offset_tree);
	+ else
	+ return (&vq->vq_trim_offset_tree);
	+}

	static int
	-vdev_queue_to_compare(const void x1, const void x2)
	+vdev_queue_timestamp_compare(const void x1, const void x2)
	{
	const zio_t z1 = (const zio_t )x1;
	const zio_t z2 = (const zio_t )x2;

	- int tcmp = TREE_CMP(z1->io_timestamp >> VDQ_T_SHIFT,
	- z2->io_timestamp >> VDQ_T_SHIFT);
	- int ocmp = TREE_CMP(z1->io_offset, z2->io_offset);
	- int cmp = tcmp ? tcmp : ocmp;
	+ int cmp = TREE_CMP(z1->io_timestamp, z2->io_timestamp);

	- if (likely(cmp \| (z1->io_queue_state == ZIO_QS_NONE)))
	+ if (likely(cmp))
	return (cmp);

	return (TREE_PCMP(z1, z2));
	}

	-static inline boolean_t
	-vdev_queue_class_fifo(zio_priority_t p)
	-{
	- return (p == ZIO_PRIORITY_SYNC_READ \|\| p == ZIO_PRIORITY_SYNC_WRITE \|\|
	- p == ZIO_PRIORITY_TRIM);
	-}
	-
	-static void
	-vdev_queue_class_add(vdev_queue_t vq, zio_t zio)
	-{
	- zio_priority_t p = zio->io_priority;
	- vq->vq_cqueued \|= 1U << p;
	- if (vdev_queue_class_fifo(p))
	- list_insert_tail(&vq->vq_class[p].vqc_list, zio);
	- else
	- avl_add(&vq->vq_class[p].vqc_tree, zio);
	-}
	-
	-static void
	-vdev_queue_class_remove(vdev_queue_t vq, zio_t zio)
	-{
	- zio_priority_t p = zio->io_priority;
	- uint32_t empty;
	- if (vdev_queue_class_fifo(p)) {
	- list_t *list = &vq->vq_class[p].vqc_list;
	- list_remove(list, zio);
	- empty = list_is_empty(list);
	- } else {
	- avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
	- avl_remove(tree, zio);
	- empty = avl_is_empty(tree);
	- }
	- vq->vq_cqueued &= ~(empty << p);
	-}
	-
	static uint_t
	vdev_queue_class_min_active(vdev_queue_t *vq, zio_priority_t p)
	{
	@@ -375,7 +360,7 @@
	}

	static uint_t
	-vdev_queue_class_max_active(vdev_queue_t *vq, zio_priority_t p)
	+vdev_queue_class_max_active(spa_t spa, vdev_queue_t vq, zio_priority_t p)
	{
	switch (p) {
	case ZIO_PRIORITY_SYNC_READ:
	@@ -385,7 +370,7 @@
	case ZIO_PRIORITY_ASYNC_READ:
	return (zfs_vdev_async_read_max_active);
	case ZIO_PRIORITY_ASYNC_WRITE:
	- return (vdev_queue_max_async_writes(vq->vq_vdev->vdev_spa));
	+ return (vdev_queue_max_async_writes(spa));
	case ZIO_PRIORITY_SCRUB:
	if (vq->vq_ia_active > 0) {
	return (MIN(vq->vq_nia_credit,
	@@ -429,10 +414,10 @@
	static zio_priority_t
	vdev_queue_class_to_issue(vdev_queue_t *vq)
	{
	- uint32_t cq = vq->vq_cqueued;
	- zio_priority_t p, p1;
	+ spa_t *spa = vq->vq_vdev->vdev_spa;
	+ zio_priority_t p, n;

	- if (cq == 0 \|\| vq->vq_active >= zfs_vdev_max_active)
	+ if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
	return (ZIO_PRIORITY_NUM_QUEUEABLE);

	/*
	@@ -440,18 +425,14 @@
	* Do round-robin to reduce starvation due to zfs_vdev_max_active
	* and vq_nia_credit limits.
	*/
	- p1 = vq->vq_last_prio + 1;
	- if (p1 >= ZIO_PRIORITY_NUM_QUEUEABLE)
	- p1 = 0;
	- for (p = p1; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
	- if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
	- vdev_queue_class_min_active(vq, p))
	- goto found;
	- }
	- for (p = 0; p < p1; p++) {
	- if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
	- vdev_queue_class_min_active(vq, p))
	- goto found;
	+ for (n = 0; n < ZIO_PRIORITY_NUM_QUEUEABLE; n++) {
	+ p = (vq->vq_last_prio + n + 1) % ZIO_PRIORITY_NUM_QUEUEABLE;
	+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
	+ vq->vq_class[p].vqc_active <
	+ vdev_queue_class_min_active(vq, p)) {
	+ vq->vq_last_prio = p;
	+ return (p);
	+ }
	}

	/*
	@@ -459,14 +440,16 @@
	* maximum # outstanding i/os.
	*/
	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
	- if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
	- vdev_queue_class_max_active(vq, p))
	- break;
	+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
	+ vq->vq_class[p].vqc_active <
	+ vdev_queue_class_max_active(spa, vq, p)) {
	+ vq->vq_last_prio = p;
	+ return (p);
	+ }
	}

	-found:
	- vq->vq_last_prio = p;
	- return (p);
	+ /* No eligible queued i/os */
	+ return (ZIO_PRIORITY_NUM_QUEUEABLE);
	}

	void
	@@ -475,30 +458,42 @@
	vdev_queue_t *vq = &vd->vdev_queue;
	zio_priority_t p;

	+ mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
	vq->vq_vdev = vd;
	+ taskq_init_ent(&vd->vdev_queue.vq_io_search.io_tqent);

	- for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
	- if (vdev_queue_class_fifo(p)) {
	- list_create(&vq->vq_class[p].vqc_list,
	- sizeof (zio_t),
	- offsetof(struct zio, io_queue_node.l));
	- } else {
	- avl_create(&vq->vq_class[p].vqc_tree,
	- vdev_queue_to_compare, sizeof (zio_t),
	- offsetof(struct zio, io_queue_node.a));
	- }
	- }
	- avl_create(&vq->vq_read_offset_tree,
	+ avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
	+ sizeof (zio_t), offsetof(struct zio, io_queue_node));
	+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
	+ vdev_queue_offset_compare, sizeof (zio_t),
	+ offsetof(struct zio, io_offset_node));
	+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
	vdev_queue_offset_compare, sizeof (zio_t),
	offsetof(struct zio, io_offset_node));
	- avl_create(&vq->vq_write_offset_tree,
	+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM),
	vdev_queue_offset_compare, sizeof (zio_t),
	offsetof(struct zio, io_offset_node));

	+ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
	+ int (compfn) (const void , const void *);
	+
	+ /*
	+ * The synchronous/trim i/o queues are dispatched in FIFO rather
	+ * than LBA order. This provides more consistent latency for
	+ * these i/os.
	+ */
	+ if (p == ZIO_PRIORITY_SYNC_READ \|\|
	+ p == ZIO_PRIORITY_SYNC_WRITE \|\|
	+ p == ZIO_PRIORITY_TRIM) {
	+ compfn = vdev_queue_timestamp_compare;
	+ } else {
	+ compfn = vdev_queue_offset_compare;
	+ }
	+ avl_create(vdev_queue_class_tree(vq, p), compfn,
	+ sizeof (zio_t), offsetof(struct zio, io_queue_node));
	+ }
	+
	vq->vq_last_offset = 0;
	- list_create(&vq->vq_active_list, sizeof (struct zio),
	- offsetof(struct zio, io_queue_node.l));
	- mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
	}

	void
	@@ -506,39 +501,30 @@
	{
	vdev_queue_t *vq = &vd->vdev_queue;

	- for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
	- if (vdev_queue_class_fifo(p))
	- list_destroy(&vq->vq_class[p].vqc_list);
	- else
	- avl_destroy(&vq->vq_class[p].vqc_tree);
	- }
	- avl_destroy(&vq->vq_read_offset_tree);
	- avl_destroy(&vq->vq_write_offset_tree);
	+ for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
	+ avl_destroy(vdev_queue_class_tree(vq, p));
	+ avl_destroy(&vq->vq_active_tree);
	+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
	+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
	+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM));

	- list_destroy(&vq->vq_active_list);
	mutex_destroy(&vq->vq_lock);
	}

	static void
	vdev_queue_io_add(vdev_queue_t vq, zio_t zio)
	{
	- zio->io_queue_state = ZIO_QS_QUEUED;
	- vdev_queue_class_add(vq, zio);
	- if (zio->io_type == ZIO_TYPE_READ)
	- avl_add(&vq->vq_read_offset_tree, zio);
	- else if (zio->io_type == ZIO_TYPE_WRITE)
	- avl_add(&vq->vq_write_offset_tree, zio);
	+ ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
	+ avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
	+ avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
	}

	static void
	vdev_queue_io_remove(vdev_queue_t vq, zio_t zio)
	{
	- vdev_queue_class_remove(vq, zio);
	- if (zio->io_type == ZIO_TYPE_READ)
	- avl_remove(&vq->vq_read_offset_tree, zio);
	- else if (zio->io_type == ZIO_TYPE_WRITE)
	- avl_remove(&vq->vq_write_offset_tree, zio);
	- zio->io_queue_state = ZIO_QS_NONE;
	+ ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
	+ avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
	+ avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
	}

	static boolean_t
	@@ -560,16 +546,14 @@
	{
	ASSERT(MUTEX_HELD(&vq->vq_lock));
	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
	- vq->vq_cactive[zio->io_priority]++;
	- vq->vq_active++;
	+ vq->vq_class[zio->io_priority].vqc_active++;
	if (vdev_queue_is_interactive(zio->io_priority)) {
	if (++vq->vq_ia_active == 1)
	vq->vq_nia_credit = 1;
	} else if (vq->vq_ia_active > 0) {
	vq->vq_nia_credit--;
	}
	- zio->io_queue_state = ZIO_QS_ACTIVE;
	- list_insert_tail(&vq->vq_active_list, zio);
	+ avl_add(&vq->vq_active_tree, zio);
	}

	static void
	@@ -577,8 +561,7 @@
	{
	ASSERT(MUTEX_HELD(&vq->vq_lock));
	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
	- vq->vq_cactive[zio->io_priority]--;
	- vq->vq_active--;
	+ vq->vq_class[zio->io_priority].vqc_active--;
	if (vdev_queue_is_interactive(zio->io_priority)) {
	if (--vq->vq_ia_active == 0)
	vq->vq_nia_credit = 0;
	@@ -586,8 +569,7 @@
	vq->vq_nia_credit = zfs_vdev_nia_credit;
	} else if (vq->vq_ia_active == 0)
	vq->vq_nia_credit++;
	- list_remove(&vq->vq_active_list, zio);
	- zio->io_queue_state = ZIO_QS_NONE;
	+ avl_remove(&vq->vq_active_tree, zio);
	}

	static void
	@@ -620,28 +602,29 @@
	uint64_t maxgap = 0;
	uint64_t size;
	uint64_t limit;
	+ int maxblocksize;
	boolean_t stretch = B_FALSE;
	+ avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
	+ zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
	uint64_t next_offset;
	abd_t *abd;
	- avl_tree_t *t;
	-
	- /*
	- * TRIM aggregation should not be needed since code in zfs_trim.c can
	- * submit TRIM I/O for extents up to zfs_trim_extent_bytes_max (128M).
	- */
	- if (zio->io_type == ZIO_TYPE_TRIM)
	- return (NULL);
	-
	- if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
	- return (NULL);

	+ maxblocksize = spa_maxblocksize(vq->vq_vdev->vdev_spa);
	if (vq->vq_vdev->vdev_nonrot)
	limit = zfs_vdev_aggregation_limit_non_rotating;
	else
	limit = zfs_vdev_aggregation_limit;
	- if (limit == 0)
	+ limit = MIN(limit, maxblocksize);
	+
	+ if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE \|\| limit == 0)
	+ return (NULL);
	+
	+ /*
	+ * While TRIM commands could be aggregated based on offset this
	+ * behavior is disabled until it's determined to be beneficial.
	+ */
	+ if (zio->io_type == ZIO_TYPE_TRIM && !zfs_vdev_aggregate_trim)
	return (NULL);
	- limit = MIN(limit, SPA_MAXBLOCKSIZE);

	/*
	* I/Os to distributed spares are directly dispatched to the dRAID
	@@ -652,13 +635,8 @@

	first = last = zio;

	- if (zio->io_type == ZIO_TYPE_READ) {
	+ if (zio->io_type == ZIO_TYPE_READ)
	maxgap = zfs_vdev_read_gap_limit;
	- t = &vq->vq_read_offset_tree;
	- } else {
	- ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
	- t = &vq->vq_write_offset_tree;
	- }

	/*
	* We can aggregate I/Os that are sufficiently adjacent and of
	@@ -679,7 +657,6 @@
	* Walk backwards through sufficiently contiguous I/Os
	* recording the last non-optional I/O.
	*/
	- zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
	while ((dio = AVL_PREV(t, first)) != NULL &&
	(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
	IO_SPAN(dio, last) <= limit &&
	@@ -709,7 +686,7 @@
	(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
	(IO_SPAN(first, dio) <= limit \|\|
	(dio->io_flags & ZIO_FLAG_OPTIONAL)) &&
	- IO_SPAN(first, dio) <= SPA_MAXBLOCKSIZE &&
	+ IO_SPAN(first, dio) <= maxblocksize &&
	IO_GAP(last, dio) <= maxgap &&
	dio->io_type == zio->io_type) {
	last = dio;
	@@ -763,7 +740,7 @@
	return (NULL);

	size = IO_SPAN(first, last);
	- ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
	+ ASSERT3U(size, <=, maxblocksize);

	abd = abd_alloc_gang();
	if (abd == NULL)
	@@ -771,7 +748,8 @@

	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
	abd, size, first->io_type, zio->io_priority,
	- flags \| ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
	+ flags \| ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_QUEUE,
	+ vdev_queue_agg_io_done, NULL);
	aio->io_timestamp = first->io_timestamp;

	nio = first;
	@@ -847,30 +825,19 @@
	return (NULL);
	}

	- if (vdev_queue_class_fifo(p)) {
	- zio = list_head(&vq->vq_class[p].vqc_list);
	- } else {
	- /*
	- * For LBA-ordered queues (async / scrub / initializing),
	- * issue the I/O which follows the most recently issued I/O
	- * in LBA (offset) order, but to avoid starvation only within
	- * the same 0.5 second interval as the first I/O.
	- */
	- tree = &vq->vq_class[p].vqc_tree;
	- zio = aio = avl_first(tree);
	- if (zio->io_offset < vq->vq_last_offset) {
	- vq->vq_io_search.io_timestamp = zio->io_timestamp;
	- vq->vq_io_search.io_offset = vq->vq_last_offset;
	- zio = avl_find(tree, &vq->vq_io_search, &idx);
	- if (zio == NULL) {
	- zio = avl_nearest(tree, idx, AVL_AFTER);
	- if (zio == NULL \|\|
	- (zio->io_timestamp >> VDQ_T_SHIFT) !=
	- (aio->io_timestamp >> VDQ_T_SHIFT))
	- zio = aio;
	- }
	- }
	- }
	+ /*
	+ * For LBA-ordered queues (async / scrub / initializing), issue the
	+ * i/o which follows the most recently issued i/o in LBA (offset) order.
	+ *
	+ * For FIFO queues (sync/trim), issue the i/o with the lowest timestamp.
	+ */
	+ tree = vdev_queue_class_tree(vq, p);
	+ vq->vq_io_search.io_timestamp = 0;
	+ vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
	+ VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
	+ zio = avl_nearest(tree, idx, AVL_AFTER);
	+ if (zio == NULL)
	+ zio = avl_first(tree);
	ASSERT3U(zio->io_priority, ==, p);

	aio = vdev_queue_aggregate(vq, zio);
	@@ -940,7 +907,7 @@
	ASSERT(zio->io_priority == ZIO_PRIORITY_TRIM);
	}

	- zio->io_flags \|= ZIO_FLAG_DONT_QUEUE;
	+ zio->io_flags \|= ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_QUEUE;
	zio->io_timestamp = gethrtime();

	mutex_enter(&vq->vq_lock);
	@@ -1001,6 +968,7 @@
	vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority)
	{
	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
	+ avl_tree_t *tree;

	/*
	* ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio
	@@ -1035,11 +1003,12 @@
	* Otherwise, the zio is currently active and we cannot change its
	* priority.
	*/
	- if (zio->io_queue_state == ZIO_QS_QUEUED) {
	- vdev_queue_class_remove(vq, zio);
	+ tree = vdev_queue_class_tree(vq, zio->io_priority);
	+ if (avl_find(tree, zio, NULL) == zio) {
	+ avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
	zio->io_priority = priority;
	- vdev_queue_class_add(vq, zio);
	- } else if (zio->io_queue_state == ZIO_QS_NONE) {
	+ avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
	+ } else if (avl_find(&vq->vq_active_tree, zio, NULL) != zio) {
	zio->io_priority = priority;
	}

	@@ -1052,10 +1021,10 @@
	* vq_lock mutex use here, instead we prefer to keep it lock free for
	* performance.
	*/
	-uint32_t
	+int
	vdev_queue_length(vdev_t *vd)
	{
	- return (vd->vdev_queue.vq_active);
	+ return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
	}

	uint64_t
	@@ -1064,22 +1033,15 @@
	return (vd->vdev_queue.vq_last_offset);
	}

	-uint64_t
	-vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
	-{
	- vdev_queue_t *vq = &vd->vdev_queue;
	- if (vdev_queue_class_fifo(p))
	- return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
	- else
	- return (avl_numnodes(&vq->vq_class[p].vqc_tree));
	-}
	-
	ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit, UINT, ZMOD_RW,
	"Max vdev I/O aggregation size");

	ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit_non_rotating, UINT,
	ZMOD_RW, "Max vdev I/O aggregation size for non-rotating media");

	+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregate_trim, UINT, ZMOD_RW,
	+ "Allow TRIM I/O to be aggregated");
	+
	ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, read_gap_limit, UINT, ZMOD_RW,
	"Aggregate read I/O over gap");

	diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
	--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
	+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
	@@ -571,10 +571,8 @@
	vdev_rebuild_blkptr_init(&blk, vd, start, size);
	uint64_t psize = BP_GET_PSIZE(&blk);

	- if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) {
	- vr->vr_pass_bytes_skipped += size;
	+ if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
	return (0);
	- }

	mutex_enter(&vr->vr_io_lock);

	@@ -788,7 +786,6 @@
	vr->vr_pass_start_time = gethrtime();
	vr->vr_pass_bytes_scanned = 0;
	vr->vr_pass_bytes_issued = 0;
	- vr->vr_pass_bytes_skipped = 0;

	uint64_t update_est_time = gethrtime();
	vdev_rebuild_update_bytes_est(vd, 0);
	@@ -1156,7 +1153,6 @@
	vr->vr_pass_start_time);
	vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned;
	vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued;
	- vrs->vrs_pass_bytes_skipped = vr->vr_pass_bytes_skipped;
	mutex_exit(&tvd->vdev_rebuild_lock);
	}

	diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
	--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
	+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
	@@ -285,7 +285,6 @@
	}
	}

	-__attribute__((always_inline)) inline
	static int
	mze_compare(const void arg1, const void arg2)
	{
	@@ -296,9 +295,6 @@
	(uint64_t)(mze2->mze_hash) << 32 \| mze2->mze_cd));
	}

	-ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
	- mze_compare)
	-
	static void
	mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
	{
	@@ -465,7 +461,7 @@
	* 62 entries before we have to add 2KB B-tree core node.
	*/
	zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
	- mze_find_in_buf, sizeof (mzap_ent_t), 512);
	+ sizeof (mzap_ent_t), 512);

	zap_name_t *zn = zap_name_alloc(zap);
	for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
	diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
	--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
	+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
	@@ -1522,8 +1522,9 @@
	{
	recent_events_node_t *entry;

	- while ((entry = list_remove_head(&recent_events_list)) != NULL) {
	+ while ((entry = list_head(&recent_events_list)) != NULL) {
	avl_remove(&recent_events_tree, entry);
	+ list_remove(&recent_events_list, entry);
	kmem_free(entry, sizeof (*entry));
	}
	avl_destroy(&recent_events_tree);
	diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
	--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
	+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
	@@ -699,15 +699,19 @@
	zfs_fuid_t *zfuid;
	zfs_fuid_domain_t *zdomain;

	- while ((zfuid = list_remove_head(&fuidp->z_fuids)) != NULL)
	+ while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
	+ list_remove(&fuidp->z_fuids, zfuid);
	kmem_free(zfuid, sizeof (zfs_fuid_t));
	+ }

	if (fuidp->z_domain_table != NULL)
	kmem_free(fuidp->z_domain_table,
	(sizeof (char )) fuidp->z_domain_cnt);

	- while ((zdomain = list_remove_head(&fuidp->z_domains)) != NULL)
	+ while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
	+ list_remove(&fuidp->z_domains, zdomain);
	kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
	+ }

	kmem_free(fuidp, sizeof (zfs_fuid_info_t));
	}
	diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
	--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
	+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
	@@ -87,7 +87,8 @@
	zfs_onexit_action_node_t *ap;

	mutex_enter(&zo->zo_lock);
	- while ((ap = list_remove_head(&zo->zo_actions)) != NULL) {
	+ while ((ap = list_head(&zo->zo_actions)) != NULL) {
	+ list_remove(&zo->zo_actions, ap);
	mutex_exit(&zo->zo_lock);
	ap->za_func(ap->za_data);
	kmem_free(ap, sizeof (zfs_onexit_action_node_t));
	diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
	--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
	+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
	@@ -462,12 +462,14 @@
	return (SET_ERROR(EINVAL));
	}

	+ const uint64_t max_blksz = zfsvfs->z_max_blksz;
	+
	/*
	* Pre-fault the pages to ensure slow (eg NFS) pages
	* don't hold up txg.
	+ * Skip this if uio contains loaned arc_buf.
	*/
	- ssize_t pfbytes = MIN(n, DMU_MAX_ACCESS >> 1);
	- if (zfs_uio_prefaultpages(pfbytes, uio)) {
	+ if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
	zfs_exit(zfsvfs, FTAG);
	return (SET_ERROR(EFAULT));
	}
	@@ -542,31 +544,10 @@
	break;
	}

	- uint64_t blksz;
	- if (lr->lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) {
	- if (zp->z_blksz > zfsvfs->z_max_blksz &&
	- !ISP2(zp->z_blksz)) {
	- /*
	- * File's blocksize is already larger than the
	- * "recordsize" property. Only let it grow to
	- * the next power of 2.
	- */
	- blksz = 1 << highbit64(zp->z_blksz);
	- } else {
	- blksz = zfsvfs->z_max_blksz;
	- }
	- blksz = MIN(blksz, P2ROUNDUP(end_size,
	- SPA_MINBLOCKSIZE));
	- blksz = MAX(blksz, zp->z_blksz);
	- } else {
	- blksz = zp->z_blksz;
	- }
	-
	arc_buf_t *abuf = NULL;
	- ssize_t nbytes = n;
	- if (n >= blksz && woff >= zp->z_size &&
	- P2PHASE(woff, blksz) == 0 &&
	- (blksz >= SPA_OLD_MAXBLOCKSIZE \|\| n < 4 * blksz)) {
	+ if (n >= max_blksz && woff >= zp->z_size &&
	+ P2PHASE(woff, max_blksz) == 0 &&
	+ zp->z_blksz == max_blksz) {
	/*
	* This write covers a full block. "Borrow" a buffer
	* from the dmu so that we can fill it before we enter
	@@ -574,26 +555,18 @@
	* holding up the transaction if the data copy hangs
	* up on a pagefault (e.g., from an NFS server mapping).
	*/
	+ size_t cbytes;
	+
	abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
	- blksz);
	+ max_blksz);
	ASSERT(abuf != NULL);
	- ASSERT(arc_buf_size(abuf) == blksz);
	- if ((error = zfs_uiocopy(abuf->b_data, blksz,
	- UIO_WRITE, uio, &nbytes))) {
	+ ASSERT(arc_buf_size(abuf) == max_blksz);
	+ if ((error = zfs_uiocopy(abuf->b_data, max_blksz,
	+ UIO_WRITE, uio, &cbytes))) {
	dmu_return_arcbuf(abuf);
	break;
	}
	- ASSERT3S(nbytes, ==, blksz);
	- } else {
	- nbytes = MIN(n, (DMU_MAX_ACCESS >> 1) -
	- P2PHASE(woff, blksz));
	- if (pfbytes < nbytes) {
	- if (zfs_uio_prefaultpages(nbytes, uio)) {
	- error = SET_ERROR(EFAULT);
	- break;
	- }
	- pfbytes = nbytes;
	- }
	+ ASSERT3S(cbytes, ==, max_blksz);
	}

	/*
	@@ -603,7 +576,8 @@
	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
	dmu_buf_impl_t db = (dmu_buf_impl_t )sa_get_db(zp->z_sa_hdl);
	DB_DNODE_ENTER(db);
	- dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, nbytes);
	+ dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
	+ MIN(n, max_blksz));
	DB_DNODE_EXIT(db);
	zfs_sa_upgrade_txholds(tx, zp);
	error = dmu_tx_assign(tx, TXG_WAIT);
	@@ -626,10 +600,31 @@
	* shrink down lr_length to the appropriate size.
	*/
	if (lr->lr_length == UINT64_MAX) {
	- zfs_grow_blocksize(zp, blksz, tx);
	+ uint64_t new_blksz;
	+
	+ if (zp->z_blksz > max_blksz) {
	+ /*
	+ * File's blocksize is already larger than the
	+ * "recordsize" property. Only let it grow to
	+ * the next power of 2.
	+ */
	+ ASSERT(!ISP2(zp->z_blksz));
	+ new_blksz = MIN(end_size,
	+ 1 << highbit64(zp->z_blksz));
	+ } else {
	+ new_blksz = MIN(end_size, max_blksz);
	+ }
	+ zfs_grow_blocksize(zp, new_blksz, tx);
	zfs_rangelock_reduce(lr, woff, n);
	}

	+ /*
	+ * XXX - should we really limit each write to z_max_blksz?
	+ * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
	+ */
	+ const ssize_t nbytes =
	+ MIN(n, max_blksz - P2PHASE(woff, max_blksz));
	+
	ssize_t tx_bytes;
	if (abuf == NULL) {
	tx_bytes = zfs_uio_resid(uio);
	@@ -649,8 +644,12 @@
	* zfs_uio_prefaultpages, or prefaultpages may
	* error, and we may break the loop early.
	*/
	- n -= tx_bytes - zfs_uio_resid(uio);
	- pfbytes -= tx_bytes - zfs_uio_resid(uio);
	+ if (tx_bytes != zfs_uio_resid(uio))
	+ n -= tx_bytes - zfs_uio_resid(uio);
	+ if (zfs_uio_prefaultpages(MIN(n, max_blksz),
	+ uio)) {
	+ break;
	+ }
	continue;
	}
	#endif
	@@ -666,6 +665,15 @@
	}
	tx_bytes -= zfs_uio_resid(uio);
	} else {
	+ /* Implied by abuf != NULL: */
	+ ASSERT3S(n, >=, max_blksz);
	+ ASSERT0(P2PHASE(woff, max_blksz));
	+ /*
	+ * We can simplify nbytes to MIN(n, max_blksz) since
	+ * P2PHASE(woff, max_blksz) is 0, and knowing
	+ * n >= max_blksz lets us simplify further:
	+ */
	+ ASSERT3S(nbytes, ==, max_blksz);
	/*
	* Thus, we're writing a full block at a block-aligned
	* offset and extending the file past EOF.
	@@ -750,7 +758,13 @@
	break;
	ASSERT3S(tx_bytes, ==, nbytes);
	n -= nbytes;
	- pfbytes -= nbytes;
	+
	+ if (n > 0) {
	+ if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
	+ error = SET_ERROR(EFAULT);
	+ break;
	+ }
	+ }
	}

	zfs_znode_update_vfs(zp);
	diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
	--- a/sys/contrib/openzfs/module/zfs/zil.c
	+++ b/sys/contrib/openzfs/module/zfs/zil.c
	@@ -116,12 +116,8 @@
	{ "zil_itx_needcopy_bytes", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_normal_write", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_normal_alloc", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_slog_count", KSTAT_DATA_UINT64 },
	{ "zil_itx_metaslab_slog_bytes", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_slog_write", KSTAT_DATA_UINT64 },
	- { "zil_itx_metaslab_slog_alloc", KSTAT_DATA_UINT64 },
	};

	static zil_sums_t zil_sums_global;
	@@ -150,10 +146,6 @@
	static kmem_cache_t *zil_lwb_cache;
	static kmem_cache_t *zil_zcw_cache;

	-static void zil_lwb_commit(zilog_t zilog, lwb_t lwb, itx_t *itx);
	-static void zil_lwb_write_issue(zilog_t zilog, lwb_t lwb);
	-static itx_t zil_itx_clone(itx_t oitx);
	-
	static int
	zil_bp_compare(const void x1, const void x2)
	{
	@@ -249,10 +241,11 @@
	*/
	static int
	zil_read_log_block(zilog_t zilog, boolean_t decrypt, const blkptr_t bp,
	- blkptr_t nbp, char begin, char end, arc_buf_t *abuf)
	+ blkptr_t nbp, void dst, char **end)
	{
	zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
	arc_flags_t aflags = ARC_FLAG_WAIT;
	+ arc_buf_t *abuf = NULL;
	zbookmark_phys_t zb;
	int error;

	@@ -269,7 +262,7 @@
	ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);

	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
	- abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
	+ &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);

	if (error == 0) {
	zio_cksum_t cksum = bp->blk_cksum;
	@@ -284,23 +277,23 @@
	*/
	cksum.zc_word[ZIL_ZC_SEQ]++;

	- uint64_t size = BP_GET_LSIZE(bp);
	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
	- zil_chain_t zilc = (abuf)->b_data;
	+ zil_chain_t *zilc = abuf->b_data;
	char lr = (char )(zilc + 1);
	+ uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);

	if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
	- sizeof (cksum)) \|\| BP_IS_HOLE(&zilc->zc_next_blk) \|\|
	- zilc->zc_nused < sizeof (*zilc) \|\|
	- zilc->zc_nused > size) {
	+ sizeof (cksum)) \|\| BP_IS_HOLE(&zilc->zc_next_blk)) {
	error = SET_ERROR(ECKSUM);
	} else {
	- *begin = lr;
	- end = lr + zilc->zc_nused - sizeof (zilc);
	+ ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
	+ memcpy(dst, lr, len);
	+ end = (char )dst + len;
	*nbp = zilc->zc_next_blk;
	}
	} else {
	- char lr = (abuf)->b_data;
	+ char *lr = abuf->b_data;
	+ uint64_t size = BP_GET_LSIZE(bp);
	zil_chain_t zilc = (zil_chain_t )(lr + size) - 1;

	if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
	@@ -308,11 +301,15 @@
	(zilc->zc_nused > (size - sizeof (*zilc)))) {
	error = SET_ERROR(ECKSUM);
	} else {
	- *begin = lr;
	- *end = lr + zilc->zc_nused;
	+ ASSERT3U(zilc->zc_nused, <=,
	+ SPA_OLD_MAXBLOCKSIZE);
	+ memcpy(dst, lr, zilc->zc_nused);
	+ end = (char )dst + zilc->zc_nused;
	*nbp = zilc->zc_next_blk;
	}
	}
	+
	+ arc_buf_destroy(abuf, &abuf);
	}

	return (error);
	@@ -378,12 +375,8 @@
	wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
	wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
	wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
	- wmsum_init(&zs->zil_itx_metaslab_normal_write, 0);
	- wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0);
	wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
	wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
	- wmsum_init(&zs->zil_itx_metaslab_slog_write, 0);
	- wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0);
	}

	void
	@@ -400,12 +393,8 @@
	wmsum_fini(&zs->zil_itx_needcopy_bytes);
	wmsum_fini(&zs->zil_itx_metaslab_normal_count);
	wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
	- wmsum_fini(&zs->zil_itx_metaslab_normal_write);
	- wmsum_fini(&zs->zil_itx_metaslab_normal_alloc);
	wmsum_fini(&zs->zil_itx_metaslab_slog_count);
	wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
	- wmsum_fini(&zs->zil_itx_metaslab_slog_write);
	- wmsum_fini(&zs->zil_itx_metaslab_slog_alloc);
	}

	void
	@@ -433,18 +422,10 @@
	wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
	zs->zil_itx_metaslab_normal_bytes.value.ui64 =
	wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
	- zs->zil_itx_metaslab_normal_write.value.ui64 =
	- wmsum_value(&zil_sums->zil_itx_metaslab_normal_write);
	- zs->zil_itx_metaslab_normal_alloc.value.ui64 =
	- wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc);
	zs->zil_itx_metaslab_slog_count.value.ui64 =
	wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
	zs->zil_itx_metaslab_slog_bytes.value.ui64 =
	wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
	- zs->zil_itx_metaslab_slog_write.value.ui64 =
	- wmsum_value(&zil_sums->zil_itx_metaslab_slog_write);
	- zs->zil_itx_metaslab_slog_alloc.value.ui64 =
	- wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
	}

	/*
	@@ -464,6 +445,7 @@
	uint64_t blk_count = 0;
	uint64_t lr_count = 0;
	blkptr_t blk, next_blk = {{{{0}}}};
	+ char lrbuf, lrp;
	int error = 0;

	/*
	@@ -481,13 +463,13 @@
	* If the log has been claimed, stop if we encounter a sequence
	* number greater than the highest claimed sequence number.
	*/
	+ lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
	zil_bp_tree_init(zilog);

	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
	uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
	int reclen;
	- char lrp, end;
	- arc_buf_t *abuf = NULL;
	+ char *end = NULL;

	if (blk_seq > claim_blk_seq)
	break;
	@@ -503,10 +485,8 @@
	break;

	error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
	- &lrp, &end, &abuf);
	+ lrbuf, &end);
	if (error != 0) {
	- if (abuf)
	- arc_buf_destroy(abuf, &abuf);
	if (claimed) {
	char name[ZFS_MAX_DATASET_NAME_LEN];

	@@ -519,25 +499,20 @@
	break;
	}

	- for (; lrp < end; lrp += reclen) {
	+ for (lrp = lrbuf; lrp < end; lrp += reclen) {
	lr_t lr = (lr_t )lrp;
	reclen = lr->lrc_reclen;
	ASSERT3U(reclen, >=, sizeof (lr_t));
	- if (lr->lrc_seq > claim_lr_seq) {
	- arc_buf_destroy(abuf, &abuf);
	+ if (lr->lrc_seq > claim_lr_seq)
	goto done;
	- }

	error = parse_lr_func(zilog, lr, arg, txg);
	- if (error != 0) {
	- arc_buf_destroy(abuf, &abuf);
	+ if (error != 0)
	goto done;
	- }
	ASSERT3U(max_lr_seq, <, lr->lrc_seq);
	max_lr_seq = lr->lrc_seq;
	lr_count++;
	}
	- arc_buf_destroy(abuf, &abuf);
	}
	done:
	zilog->zl_parse_error = error;
	@@ -547,6 +522,7 @@
	zilog->zl_parse_lr_count = lr_count;

	zil_bp_tree_fini(zilog);
	+ zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);

	return (error);
	}
	@@ -771,21 +747,20 @@
	lwb->lwb_blk = *bp;
	lwb->lwb_fastwrite = fastwrite;
	lwb->lwb_slog = slog;
	- lwb->lwb_indirect = B_FALSE;
	- if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
	- lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t);
	- lwb->lwb_sz = BP_GET_LSIZE(bp);
	- } else {
	- lwb->lwb_nused = lwb->lwb_nfilled = 0;
	- lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
	- }
	lwb->lwb_state = LWB_STATE_CLOSED;
	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
	+ lwb->lwb_max_txg = txg;
	lwb->lwb_write_zio = NULL;
	lwb->lwb_root_zio = NULL;
	lwb->lwb_issued_timestamp = 0;
	lwb->lwb_issued_txg = 0;
	- lwb->lwb_max_txg = txg;
	+ if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
	+ lwb->lwb_nused = sizeof (zil_chain_t);
	+ lwb->lwb_sz = BP_GET_LSIZE(bp);
	+ } else {
	+ lwb->lwb_nused = 0;
	+ lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
	+ }

	mutex_enter(&zilog->zl_lock);
	list_insert_tail(&zilog->zl_lwb_list, lwb);
	@@ -799,8 +774,8 @@
	{
	ASSERT(MUTEX_HELD(&zilog->zl_lock));
	ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock));
	- VERIFY(list_is_empty(&lwb->lwb_waiters));
	- VERIFY(list_is_empty(&lwb->lwb_itxs));
	+ ASSERT(list_is_empty(&lwb->lwb_waiters));
	+ ASSERT(list_is_empty(&lwb->lwb_itxs));
	ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
	ASSERT3P(lwb->lwb_write_zio, ==, NULL);
	ASSERT3P(lwb->lwb_root_zio, ==, NULL);
	@@ -1398,14 +1373,9 @@
	zil_commit_waiter_t *zcw;
	itx_t *itx;
	uint64_t txg;
	- list_t itxs, waiters;

	spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);

	- list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
	- list_create(&waiters, sizeof (zil_commit_waiter_t),
	- offsetof(zil_commit_waiter_t, zcw_node));
	-
	hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;

	mutex_enter(&zilog->zl_lock);
	@@ -1414,6 +1384,9 @@

	lwb->lwb_root_zio = NULL;

	+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
	+ lwb->lwb_state = LWB_STATE_FLUSH_DONE;
	+
	if (zilog->zl_last_lwb_opened == lwb) {
	/*
	* Remember the highest committed log sequence number
	@@ -1424,22 +1397,13 @@
	zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
	}

	- list_move_tail(&itxs, &lwb->lwb_itxs);
	- list_move_tail(&waiters, &lwb->lwb_waiters);
	- txg = lwb->lwb_issued_txg;
	-
	- ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
	- lwb->lwb_state = LWB_STATE_FLUSH_DONE;
	-
	- mutex_exit(&zilog->zl_lock);
	-
	- while ((itx = list_remove_head(&itxs)) != NULL)
	+ while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
	zil_itx_destroy(itx);
	- list_destroy(&itxs);

	- while ((zcw = list_remove_head(&waiters)) != NULL) {
	+ while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
	mutex_enter(&zcw->zcw_lock);

	+ ASSERT3P(zcw->zcw_lwb, ==, lwb);
	zcw->zcw_lwb = NULL;
	/*
	* We expect any ZIO errors from child ZIOs to have been
	@@ -1464,9 +1428,11 @@

	mutex_exit(&zcw->zcw_lock);
	}
	- list_destroy(&waiters);
	+
	+ mutex_exit(&zilog->zl_lock);

	mutex_enter(&zilog->zl_lwb_io_lock);
	+ txg = lwb->lwb_issued_txg;
	ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0);
	zilog->zl_lwb_inflight[txg & TXG_MASK]--;
	if (zilog->zl_lwb_inflight[txg & TXG_MASK] == 0)
	@@ -1700,41 +1666,46 @@
	EQUIV(lwb->lwb_root_zio == NULL, lwb->lwb_state == LWB_STATE_CLOSED);
	EQUIV(lwb->lwb_root_zio != NULL, lwb->lwb_state == LWB_STATE_OPENED);

	- if (lwb->lwb_root_zio != NULL)
	- return;
	-
	- lwb->lwb_root_zio = zio_root(zilog->zl_spa,
	- zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
	-
	- abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
	- BP_GET_LSIZE(&lwb->lwb_blk));
	-
	- if (!lwb->lwb_slog \|\| zilog->zl_cur_used <= zil_slog_bulk)
	- prio = ZIO_PRIORITY_SYNC_WRITE;
	- else
	- prio = ZIO_PRIORITY_ASYNC_WRITE;
	-
	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
	ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
	lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);

	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
	mutex_enter(&zilog->zl_lock);
	- if (!lwb->lwb_fastwrite) {
	- metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
	- lwb->lwb_fastwrite = 1;
	- }
	+ if (lwb->lwb_root_zio == NULL) {
	+ abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
	+ BP_GET_LSIZE(&lwb->lwb_blk));
	+
	+ if (!lwb->lwb_fastwrite) {
	+ metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
	+ lwb->lwb_fastwrite = 1;
	+ }
	+
	+ if (!lwb->lwb_slog \|\| zilog->zl_cur_used <= zil_slog_bulk)
	+ prio = ZIO_PRIORITY_SYNC_WRITE;
	+ else
	+ prio = ZIO_PRIORITY_ASYNC_WRITE;

	- lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
	- &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
	- zil_lwb_write_done, lwb, prio,
	- ZIO_FLAG_CANFAIL \| ZIO_FLAG_FASTWRITE, &zb);
	+ lwb->lwb_root_zio = zio_root(zilog->zl_spa,
	+ zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
	+ ASSERT3P(lwb->lwb_root_zio, !=, NULL);

	- lwb->lwb_state = LWB_STATE_OPENED;
	+ lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
	+ zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
	+ BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
	+ prio, ZIO_FLAG_CANFAIL \| ZIO_FLAG_FASTWRITE, &zb);
	+ ASSERT3P(lwb->lwb_write_zio, !=, NULL);

	- zil_lwb_set_zio_dependency(zilog, lwb);
	- zilog->zl_last_lwb_opened = lwb;
	+ lwb->lwb_state = LWB_STATE_OPENED;
	+
	+ zil_lwb_set_zio_dependency(zilog, lwb);
	+ zilog->zl_last_lwb_opened = lwb;
	+ }
	mutex_exit(&zilog->zl_lock);
	+
	+ ASSERT3P(lwb->lwb_root_zio, !=, NULL);
	+ ASSERT3P(lwb->lwb_write_zio, !=, NULL);
	+ ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
	}

	/*
	@@ -1765,11 +1736,11 @@
	static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;

	/*
	- * Close the log block for being issued and allocate the next one.
	- * Has to be called under zl_issuer_lock to chain more lwbs.
	+ * Start a log block write and advance to the next log block.
	+ * Calls are serialized.
	*/
	static lwb_t *
	-zil_lwb_write_close(zilog_t zilog, lwb_t lwb, list_t *ilwbs)
	+zil_lwb_write_issue(zilog_t zilog, lwb_t lwb)
	{
	lwb_t *nlwb = NULL;
	zil_chain_t *zilc;
	@@ -1777,7 +1748,7 @@
	blkptr_t *bp;
	dmu_tx_t *tx;
	uint64_t txg;
	- uint64_t zil_blksz;
	+ uint64_t zil_blksz, wsz;
	int i, error;
	boolean_t slog;

	@@ -1786,17 +1757,16 @@
	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);

	- /*
	- * If this lwb includes indirect writes, we have to commit before
	- * creating the transaction, otherwise we may end up in dead lock.
	- */
	- if (lwb->lwb_indirect) {
	- for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
	- itx = list_next(&lwb->lwb_itxs, itx))
	- zil_lwb_commit(zilog, lwb, itx);
	- lwb->lwb_nused = lwb->lwb_nfilled;
	+ if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
	+ zilc = (zil_chain_t *)lwb->lwb_buf;
	+ bp = &zilc->zc_next_blk;
	+ } else {
	+ zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
	+ bp = &zilc->zc_next_blk;
	}

	+ ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
	+
	/*
	* Allocate the next block and save its address in this block
	* before writing it in order to establish the log chain.
	@@ -1844,18 +1814,19 @@
	zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
	for (i = 0; i < ZIL_PREV_BLKS; i++)
	zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
	- DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
	- uint64_t, zil_blksz,
	- uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);

	- if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
	- zilc = (zil_chain_t *)lwb->lwb_buf;
	- else
	- zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
	- bp = &zilc->zc_next_blk;
	BP_ZERO(bp);
	error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
	+ if (slog) {
	+ ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
	+ ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
	+ lwb->lwb_nused);
	+ } else {
	+ ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
	+ ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
	+ lwb->lwb_nused);
	+ }
	if (error == 0) {
	ASSERT3U(bp->blk_birth, ==, txg);
	bp->blk_cksum = lwb->lwb_blk.blk_cksum;
	@@ -1867,68 +1838,17 @@
	nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
	}

	- lwb->lwb_state = LWB_STATE_ISSUED;
	-
	- dmu_tx_commit(tx);
	-
	- /*
	- * We need to acquire the config lock for the lwb to issue it later.
	- * However, if we already have a queue of closed parent lwbs already
	- * holding the config lock (but not yet issued), we can't block here
	- * waiting on the lock or we will deadlock. In that case we must
	- * first issue to parent IOs before waiting on the lock.
	- */
	- if (ilwbs && !list_is_empty(ilwbs)) {
	- if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
	- lwb_t *tlwb;
	- while ((tlwb = list_remove_head(ilwbs)) != NULL)
	- zil_lwb_write_issue(zilog, tlwb);
	- spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
	- }
	- } else {
	- spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
	- }
	-
	- if (ilwbs)
	- list_insert_tail(ilwbs, lwb);
	-
	- /*
	- * If there was an allocation failure then nlwb will be null which
	- * forces a txg_wait_synced().
	- */
	- return (nlwb);
	-}
	-
	-/*
	- * Finalize previously closed block and issue the write zio.
	- * Does not require locking.
	- */
	-static void
	-zil_lwb_write_issue(zilog_t zilog, lwb_t lwb)
	-{
	- zil_chain_t *zilc;
	- int wsz;
	-
	- /* Actually fill the lwb with the data if not yet. */
	- if (!lwb->lwb_indirect) {
	- for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
	- itx = list_next(&lwb->lwb_itxs, itx))
	- zil_lwb_commit(zilog, lwb, itx);
	- lwb->lwb_nused = lwb->lwb_nfilled;
	- }
	-
	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
	/* For Slim ZIL only write what is used. */
	- wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int);
	- ASSERT3S(wsz, <=, lwb->lwb_sz);
	+ wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
	+ ASSERT3U(wsz, <=, lwb->lwb_sz);
	zio_shrink(lwb->lwb_write_zio, wsz);
	wsz = lwb->lwb_write_zio->io_size;

	- zilc = (zil_chain_t *)lwb->lwb_buf;
	} else {
	wsz = lwb->lwb_sz;
	- zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
	}
	+
	zilc->zc_pad = 0;
	zilc->zc_nused = lwb->lwb_nused;
	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
	@@ -1938,28 +1858,22 @@
	*/
	memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);

	- if (lwb->lwb_slog) {
	- ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
	- lwb->lwb_nused);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_write,
	- wsz);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_alloc,
	- BP_GET_LSIZE(&lwb->lwb_blk));
	- } else {
	- ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
	- lwb->lwb_nused);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_write,
	- wsz);
	- ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
	- BP_GET_LSIZE(&lwb->lwb_blk));
	- }
	- ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
	+ spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
	+
	zil_lwb_add_block(lwb, &lwb->lwb_blk);
	lwb->lwb_issued_timestamp = gethrtime();
	+ lwb->lwb_state = LWB_STATE_ISSUED;
	+
	zio_nowait(lwb->lwb_root_zio);
	zio_nowait(lwb->lwb_write_zio);
	+
	+ dmu_tx_commit(tx);
	+
	+ /*
	+ * If there was an allocation failure then nlwb will be null which
	+ * forces a txg_wait_synced().
	+ */
	+ return (nlwb);
	}

	/*
	@@ -1995,19 +1909,13 @@
	sizeof (lr_write_t));
	}

	-/*
	- * Estimate space needed in the lwb for the itx. Allocate more lwbs or
	- * split the itx as needed, but don't touch the actual transaction data.
	- * Has to be called under zl_issuer_lock to call zil_lwb_write_close()
	- * to chain more lwbs.
	- */
	static lwb_t *
	-zil_lwb_assign(zilog_t zilog, lwb_t lwb, itx_t itx, list_t ilwbs)
	+zil_lwb_commit(zilog_t zilog, itx_t itx, lwb_t *lwb)
	{
	- itx_t *citx;
	- lr_t lr, clr;
	- lr_write_t *lrw;
	- uint64_t dlen, dnow, lwb_sp, reclen, max_log_data;
	+ lr_t lrcb, lrc;
	+ lr_write_t lrwb, lrw;
	+ char *lr_buf;
	+ uint64_t dlen, dnow, dpad, lwb_sp, reclen, txg, max_log_data;

	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
	ASSERT3P(lwb, !=, NULL);
	@@ -2015,8 +1923,8 @@

	zil_lwb_write_open(zilog, lwb);

	- lr = &itx->itx_lr;
	- lrw = (lr_write_t *)lr;
	+ lrc = &itx->itx_lr;
	+ lrw = (lr_write_t *)lrc;

	/*
	* A commit itx doesn't represent any on-disk state; instead
	@@ -2030,23 +1938,24 @@
	*
	* For more details, see the comment above zil_commit().
	*/
	- if (lr->lrc_txtype == TX_COMMIT) {
	+ if (lrc->lrc_txtype == TX_COMMIT) {
	mutex_enter(&zilog->zl_lock);
	zil_commit_waiter_link_lwb(itx->itx_private, lwb);
	itx->itx_private = NULL;
	mutex_exit(&zilog->zl_lock);
	- list_insert_tail(&lwb->lwb_itxs, itx);
	return (lwb);
	}

	- if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
	+ if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
	dlen = P2ROUNDUP_TYPED(
	lrw->lr_length, sizeof (uint64_t), uint64_t);
	+ dpad = dlen - lrw->lr_length;
	} else {
	- dlen = 0;
	+ dlen = dpad = 0;
	}
	- reclen = lr->lrc_reclen;
	+ reclen = lrc->lrc_reclen;
	zilog->zl_cur_used += (reclen + dlen);
	+ txg = lrc->lrc_txg;

	cont:
	/*
	@@ -2059,7 +1968,7 @@
	lwb_sp < zil_max_waste_space(zilog) &&
	(dlen % max_log_data == 0 \|\|
	lwb_sp < reclen + dlen % max_log_data))) {
	- lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
	+ lwb = zil_lwb_write_issue(zilog, lwb);
	if (lwb == NULL)
	return (NULL);
	zil_lwb_write_open(zilog, lwb);
	@@ -2078,99 +1987,19 @@
	}

	dnow = MIN(dlen, lwb_sp - reclen);
	- if (dlen > dnow) {
	- ASSERT3U(lr->lrc_txtype, ==, TX_WRITE);
	- ASSERT3U(itx->itx_wr_state, ==, WR_NEED_COPY);
	- citx = zil_itx_clone(itx);
	- clr = &citx->itx_lr;
	- lr_write_t clrw = (lr_write_t )clr;
	- clrw->lr_length = dnow;
	- lrw->lr_offset += dnow;
	- lrw->lr_length -= dnow;
	- } else {
	- citx = itx;
	- clr = lr;
	- }
	-
	- /*
	- * We're actually making an entry, so update lrc_seq to be the
	- * log record sequence number. Note that this is generally not
	- * equal to the itx sequence number because not all transactions
	- * are synchronous, and sometimes spa_sync() gets there first.
	- */
	- clr->lrc_seq = ++zilog->zl_lr_seq;
	-
	- lwb->lwb_nused += reclen + dnow;
	- ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
	- ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
	-
	- zil_lwb_add_txg(lwb, lr->lrc_txg);
	- list_insert_tail(&lwb->lwb_itxs, citx);
	-
	- dlen -= dnow;
	- if (dlen > 0) {
	- zilog->zl_cur_used += reclen;
	- goto cont;
	- }
	-
	- /*
	- * We have to really issue all queued LWBs before we may have to
	- * wait for a txg sync. Otherwise we may end up in a dead lock.
	- */
	- if (lr->lrc_txtype == TX_WRITE) {
	- boolean_t frozen = lr->lrc_txg > spa_freeze_txg(zilog->zl_spa);
	- if (frozen \|\| itx->itx_wr_state == WR_INDIRECT) {
	- lwb_t *tlwb;
	- while ((tlwb = list_remove_head(ilwbs)) != NULL)
	- zil_lwb_write_issue(zilog, tlwb);
	- }
	- if (itx->itx_wr_state == WR_INDIRECT)
	- lwb->lwb_indirect = B_TRUE;
	- if (frozen)
	- txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
	- }
	-
	- return (lwb);
	-}
	-
	-/*
	- * Fill the actual transaction data into the lwb, following zil_lwb_assign().
	- * Does not require locking.
	- */
	-static void
	-zil_lwb_commit(zilog_t zilog, lwb_t lwb, itx_t *itx)
	-{
	- lr_t lr, lrb;
	- lr_write_t lrw, lrwb;
	- char *lr_buf;
	- uint64_t dlen, reclen;
	-
	- lr = &itx->itx_lr;
	- lrw = (lr_write_t *)lr;
	-
	- if (lr->lrc_txtype == TX_COMMIT)
	- return;
	-
	- if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
	- dlen = P2ROUNDUP_TYPED(
	- lrw->lr_length, sizeof (uint64_t), uint64_t);
	- } else {
	- dlen = 0;
	- }
	- reclen = lr->lrc_reclen;
	- ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled);
	-
	- lr_buf = lwb->lwb_buf + lwb->lwb_nfilled;
	- memcpy(lr_buf, lr, reclen);
	- lrb = (lr_t )lr_buf; / Like lr, but inside lwb. */
	- lrwb = (lr_write_t )lrb; / Like lrw, but inside lwb. */
	+ lr_buf = lwb->lwb_buf + lwb->lwb_nused;
	+ memcpy(lr_buf, lrc, reclen);
	+ lrcb = (lr_t )lr_buf; / Like lrc, but inside lwb. */
	+ lrwb = (lr_write_t )lrcb; / Like lrw, but inside lwb. */

	ZIL_STAT_BUMP(zilog, zil_itx_count);

	/*
	* If it's a write, fetch the data or get its blkptr as appropriate.
	*/
	- if (lr->lrc_txtype == TX_WRITE) {
	+ if (lrc->lrc_txtype == TX_WRITE) {
	+ if (txg > spa_freeze_txg(zilog->zl_spa))
	+ txg_wait_synced(zilog->zl_dmu_pool, txg);
	if (itx->itx_wr_state == WR_COPIED) {
	ZIL_STAT_BUMP(zilog, zil_itx_copied_count);
	ZIL_STAT_INCR(zilog, zil_itx_copied_bytes,
	@@ -2181,10 +2010,14 @@

	if (itx->itx_wr_state == WR_NEED_COPY) {
	dbuf = lr_buf + reclen;
	- lrb->lrc_reclen += dlen;
	+ lrcb->lrc_reclen += dnow;
	+ if (lrwb->lr_length > dnow)
	+ lrwb->lr_length = dnow;
	+ lrw->lr_offset += dnow;
	+ lrw->lr_length -= dnow;
	ZIL_STAT_BUMP(zilog, zil_itx_needcopy_count);
	ZIL_STAT_INCR(zilog, zil_itx_needcopy_bytes,
	- dlen);
	+ dnow);
	} else {
	ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT);
	dbuf = NULL;
	@@ -2211,11 +2044,9 @@
	error = zilog->zl_get_data(itx->itx_private,
	itx->itx_gen, lrwb, dbuf, lwb,
	lwb->lwb_write_zio);
	- if (dbuf != NULL && error == 0) {
	+ if (dbuf != NULL && error == 0 && dnow == dlen)
	/* Zero any padding bytes in the last block. */
	- memset((char *)dbuf + lrwb->lr_length, 0,
	- dlen - lrwb->lr_length);
	- }
	+ memset((char *)dbuf + lrwb->lr_length, 0, dpad);

	/*
	* Typically, the only return values we should see from
	@@ -2243,26 +2074,39 @@
	error);
	zfs_fallthrough;
	case EIO:
	- if (lwb->lwb_indirect) {
	- txg_wait_synced(zilog->zl_dmu_pool,
	- lr->lrc_txg);
	- } else {
	- lwb->lwb_write_zio->io_error = error;
	- }
	+ txg_wait_synced(zilog->zl_dmu_pool, txg);
	zfs_fallthrough;
	case ENOENT:
	zfs_fallthrough;
	case EEXIST:
	zfs_fallthrough;
	case EALREADY:
	- return;
	+ return (lwb);
	}
	}
	}

	- lwb->lwb_nfilled += reclen + dlen;
	- ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
	- ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
	+ /*
	+ * We're actually making an entry, so update lrc_seq to be the
	+ * log record sequence number. Note that this is generally not
	+ * equal to the itx sequence number because not all transactions
	+ * are synchronous, and sometimes spa_sync() gets there first.
	+ */
	+ lrcb->lrc_seq = ++zilog->zl_lr_seq;
	+ lwb->lwb_nused += reclen + dnow;
	+
	+ zil_lwb_add_txg(lwb, txg);
	+
	+ ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
	+ ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
	+
	+ dlen -= dnow;
	+ if (dlen > 0) {
	+ zilog->zl_cur_used += reclen;
	+ goto cont;
	+ }
	+
	+ return (lwb);
	}

	itx_t *
	@@ -2287,16 +2131,6 @@
	return (itx);
	}

	-static itx_t *
	-zil_itx_clone(itx_t *oitx)
	-{
	- itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
	- memcpy(itx, oitx, oitx->itx_size);
	- itx->itx_callback = NULL;
	- itx->itx_callback_data = NULL;
	- return (itx);
	-}
	-
	void
	zil_itx_destroy(itx_t *itx)
	{
	@@ -2328,7 +2162,7 @@
	/*
	* In the general case, commit itxs will not be found
	* here, as they'll be committed to an lwb via
	- * zil_lwb_assign(), and free'd in that function. Having
	+ * zil_lwb_commit(), and free'd in that function. Having
	* said that, it is still possible for commit itxs to be
	* found here, due to the following race:
	*
	@@ -2546,10 +2380,10 @@
	* This function will traverse the queue of itxs that need to be
	* committed, and move them onto the ZIL's zl_itx_commit_list.
	*/
	-static uint64_t
	+static void
	zil_get_commit_list(zilog_t *zilog)
	{
	- uint64_t otxg, txg, wtxg = 0;
	+ uint64_t otxg, txg;
	list_t *commit_list = &zilog->zl_itx_commit_list;

	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
	@@ -2583,22 +2417,10 @@
	*/
	ASSERT(zilog_is_dirty_in_txg(zilog, txg) \|\|
	spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
	- list_t *sync_list = &itxg->itxg_itxs->i_sync_list;
	- if (unlikely(zilog->zl_suspend > 0)) {
	- /*
	- * ZIL was just suspended, but we lost the race.
	- * Allow all earlier itxs to be committed, but ask
	- * caller to do txg_wait_synced(txg) for any new.
	- */
	- if (!list_is_empty(sync_list))
	- wtxg = MAX(wtxg, txg);
	- } else {
	- list_move_tail(commit_list, sync_list);
	- }
	+ list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);

	mutex_exit(&itxg->itxg_lock);
	}
	- return (wtxg);
	}

	/*
	@@ -2739,7 +2561,7 @@
	* lwb will be issued to the zio layer to be written to disk.
	*/
	static void
	-zil_process_commit_list(zilog_t zilog, zil_commit_waiter_t zcw, list_t *ilwbs)
	+zil_process_commit_list(zilog_t *zilog)
	{
	spa_t *spa = zilog->zl_spa;
	list_t nolwb_itxs;
	@@ -2841,23 +2663,18 @@
	*/
	if (frozen \|\| !synced \|\| lrc->lrc_txtype == TX_COMMIT) {
	if (lwb != NULL) {
	- lwb = zil_lwb_assign(zilog, lwb, itx, ilwbs);
	- if (lwb == NULL) {
	+ lwb = zil_lwb_commit(zilog, itx, lwb);
	+
	+ if (lwb == NULL)
	list_insert_tail(&nolwb_itxs, itx);
	- } else if ((zcw->zcw_lwb != NULL &&
	- zcw->zcw_lwb != lwb) \|\| zcw->zcw_done) {
	- /*
	- * Our lwb is done, leave the rest of
	- * itx list to somebody else who care.
	- */
	- first = B_FALSE;
	- break;
	- }
	+ else
	+ list_insert_tail(&lwb->lwb_itxs, itx);
	} else {
	if (lrc->lrc_txtype == TX_COMMIT) {
	zil_commit_waiter_link_nolwb(
	itx->itx_private, &nolwb_waiters);
	}
	+
	list_insert_tail(&nolwb_itxs, itx);
	}
	} else {
	@@ -2873,8 +2690,6 @@
	* the ZIL write pipeline; see the comment within
	* zil_commit_writer_stall() for more details.
	*/
	- while ((lwb = list_remove_head(ilwbs)) != NULL)
	- zil_lwb_write_issue(zilog, lwb);
	zil_commit_writer_stall(zilog);

	/*
	@@ -2920,13 +2735,13 @@
	* on the system, such that this function will be
	* immediately called again (not necessarily by the same
	* thread) and this lwb's zio will be issued via
	- * zil_lwb_assign(). This way, the lwb is guaranteed to
	+ * zil_lwb_commit(). This way, the lwb is guaranteed to
	* be "full" when it is issued to disk, and we'll make
	* use of the lwb's size the best we can.
	*
	* 2. If there isn't sufficient ZIL activity occurring on
	* the system, such that this lwb's zio isn't issued via
	- * zil_lwb_assign(), zil_commit_waiter() will issue the
	+ * zil_lwb_commit(), zil_commit_waiter() will issue the
	* lwb's zio. If this occurs, the lwb is not guaranteed
	* to be "full" by the time its zio is issued, and means
	* the size of the lwb was "too large" given the amount
	@@ -2958,14 +2773,10 @@
	zfs_commit_timeout_pct / 100;
	if (sleep < zil_min_commit_timeout \|\|
	lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
	- lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
	+ lwb = zil_lwb_write_issue(zilog, lwb);
	zilog->zl_cur_used = 0;
	- if (lwb == NULL) {
	- while ((lwb = list_remove_head(ilwbs))
	- != NULL)
	- zil_lwb_write_issue(zilog, lwb);
	+ if (lwb == NULL)
	zil_commit_writer_stall(zilog);
	- }
	}
	}
	}
	@@ -2985,17 +2796,12 @@
	* not issued, we rely on future calls to zil_commit_writer() to issue
	* the lwb, or the timeout mechanism found in zil_commit_waiter().
	*/
	-static uint64_t
	+static void
	zil_commit_writer(zilog_t zilog, zil_commit_waiter_t zcw)
	{
	- list_t ilwbs;
	- lwb_t *lwb;
	- uint64_t wtxg = 0;
	-
	ASSERT(!MUTEX_HELD(&zilog->zl_lock));
	ASSERT(spa_writeable(zilog->zl_spa));

	- list_create(&ilwbs, sizeof (lwb_t), offsetof(lwb_t, lwb_issue_node));
	mutex_enter(&zilog->zl_issuer_lock);

	if (zcw->zcw_lwb != NULL \|\| zcw->zcw_done) {
	@@ -3020,16 +2826,12 @@

	ZIL_STAT_BUMP(zilog, zil_commit_writer_count);

	- wtxg = zil_get_commit_list(zilog);
	+ zil_get_commit_list(zilog);
	zil_prune_commit_list(zilog);
	- zil_process_commit_list(zilog, zcw, &ilwbs);
	+ zil_process_commit_list(zilog);

	out:
	mutex_exit(&zilog->zl_issuer_lock);
	- while ((lwb = list_remove_head(&ilwbs)) != NULL)
	- zil_lwb_write_issue(zilog, lwb);
	- list_destroy(&ilwbs);
	- return (wtxg);
	}

	static void
	@@ -3056,7 +2858,7 @@
	return;

	/*
	- * In order to call zil_lwb_write_close() we must hold the
	+ * In order to call zil_lwb_write_issue() we must hold the
	* zilog's "zl_issuer_lock". We can't simply acquire that lock,
	* since we're already holding the commit waiter's "zcw_lock",
	* and those two locks are acquired in the opposite order
	@@ -3074,10 +2876,8 @@
	* the waiter is marked "done"), so without this check we could
	* wind up with a use-after-free error below.
	*/
	- if (zcw->zcw_done) {
	- lwb = NULL;
	+ if (zcw->zcw_done)
	goto out;
	- }

	ASSERT3P(lwb, ==, zcw->zcw_lwb);

	@@ -3096,17 +2896,15 @@
	* if it's ISSUED or OPENED, and block any other threads that might
	* attempt to issue this lwb. For that reason we hold the
	* zl_issuer_lock when checking the lwb_state; we must not call
	- * zil_lwb_write_close() if the lwb had already been issued.
	+ * zil_lwb_write_issue() if the lwb had already been issued.
	*
	* See the comment above the lwb_state_t structure definition for
	* more details on the lwb states, and locking requirements.
	*/
	if (lwb->lwb_state == LWB_STATE_ISSUED \|\|
	lwb->lwb_state == LWB_STATE_WRITE_DONE \|\|
	- lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
	- lwb = NULL;
	+ lwb->lwb_state == LWB_STATE_FLUSH_DONE)
	goto out;
	- }

	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);

	@@ -3116,7 +2914,7 @@
	* since we've reached the commit waiter's timeout and it still
	* hasn't been issued.
	*/
	- lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
	+ lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);

	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);

	@@ -3136,7 +2934,7 @@

	if (nlwb == NULL) {
	/*
	- * When zil_lwb_write_close() returns NULL, this
	+ * When zil_lwb_write_issue() returns NULL, this
	* indicates zio_alloc_zil() failed to allocate the
	* "next" lwb on-disk. When this occurs, the ZIL write
	* pipeline must be stalled; see the comment within the
	@@ -3158,16 +2956,12 @@
	* lock, which occurs prior to calling dmu_tx_commit()
	*/
	mutex_exit(&zcw->zcw_lock);
	- zil_lwb_write_issue(zilog, lwb);
	- lwb = NULL;
	zil_commit_writer_stall(zilog);
	mutex_enter(&zcw->zcw_lock);
	}

	out:
	mutex_exit(&zilog->zl_issuer_lock);
	- if (lwb)
	- zil_lwb_write_issue(zilog, lwb);
	ASSERT(MUTEX_HELD(&zcw->zcw_lock));
	}

	@@ -3182,7 +2976,7 @@
	* waited "long enough" and the lwb is still in the "open" state.
	*
	* Given a sufficient amount of itxs being generated and written using
	- * the ZIL, the lwb's zio will be issued via the zil_lwb_assign()
	+ * the ZIL, the lwb's zio will be issued via the zil_lwb_commit()
	* function. If this does not occur, this secondary responsibility will
	* ensure the lwb is issued even if there is not other synchronous
	* activity on the system.
	@@ -3545,7 +3339,7 @@
	zil_commit_waiter_t *zcw = zil_alloc_commit_waiter();
	zil_commit_itx_assign(zilog, zcw);

	- uint64_t wtxg = zil_commit_writer(zilog, zcw);
	+ zil_commit_writer(zilog, zcw);
	zil_commit_waiter(zilog, zcw);

	if (zcw->zcw_zio_error != 0) {
	@@ -3560,8 +3354,6 @@
	DTRACE_PROBE2(zil__commit__io__error,
	zilog_t , zilog, zil_commit_waiter_t , zcw);
	txg_wait_synced(zilog->zl_dmu_pool, 0);
	- } else if (wtxg != 0) {
	- txg_wait_synced(zilog->zl_dmu_pool, wtxg);
	}

	zil_free_commit_waiter(zcw);
	@@ -3864,7 +3656,7 @@
	/*
	* zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends
	* on the time when the dmu_tx transaction is assigned in
	- * zil_lwb_write_close().
	+ * zil_lwb_write_issue().
	*/
	mutex_enter(&zilog->zl_lwb_io_lock);
	txg = MAX(zilog->zl_lwb_max_issued_txg, txg);
	diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
	--- a/sys/contrib/openzfs/module/zfs/zio.c
	+++ b/sys/contrib/openzfs/module/zfs/zio.c
	@@ -626,6 +626,8 @@
	void
	zio_add_child(zio_t pio, zio_t cio)
	{
	+ zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
	+
	/*
	* Logical I/Os can have logical, gang, or vdev children.
	* Gang I/Os can have gang or vdev children.
	@@ -634,7 +636,6 @@
	*/
	ASSERT3S(cio->io_child_type, <=, pio->io_child_type);

	- zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
	zl->zl_parent = pio;
	zl->zl_child = cio;

	@@ -643,45 +644,16 @@

	ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);

	- uint64_t *countp = pio->io_children[cio->io_child_type];
	for (int w = 0; w < ZIO_WAIT_TYPES; w++)
	- countp[w] += !cio->io_state[w];
	+ pio->io_children[cio->io_child_type][w] += !cio->io_state[w];

	list_insert_head(&pio->io_child_list, zl);
	list_insert_head(&cio->io_parent_list, zl);

	- mutex_exit(&cio->io_lock);
	- mutex_exit(&pio->io_lock);
	-}
	-
	-void
	-zio_add_child_first(zio_t pio, zio_t cio)
	-{
	- /*
	- * Logical I/Os can have logical, gang, or vdev children.
	- * Gang I/Os can have gang or vdev children.
	- * Vdev I/Os can only have vdev children.
	- * The following ASSERT captures all of these constraints.
	- */
	- ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
	-
	- zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
	- zl->zl_parent = pio;
	- zl->zl_child = cio;
	-
	- ASSERT(list_is_empty(&cio->io_parent_list));
	- list_insert_head(&cio->io_parent_list, zl);
	-
	- mutex_enter(&pio->io_lock);
	-
	- ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
	-
	- uint64_t *countp = pio->io_children[cio->io_child_type];
	- for (int w = 0; w < ZIO_WAIT_TYPES; w++)
	- countp[w] += !cio->io_state[w];
	-
	- list_insert_head(&pio->io_child_list, zl);
	+ pio->io_child_count++;
	+ cio->io_parent_count++;

	+ mutex_exit(&cio->io_lock);
	mutex_exit(&pio->io_lock);
	}

	@@ -697,6 +669,9 @@
	list_remove(&pio->io_child_list, zl);
	list_remove(&cio->io_parent_list, zl);

	+ pio->io_child_count--;
	+ cio->io_parent_count--;
	+
	mutex_exit(&cio->io_lock);
	mutex_exit(&pio->io_lock);
	kmem_cache_free(zio_link_cache, zl);
	@@ -871,14 +846,12 @@
	zio->io_child_type = ZIO_CHILD_LOGICAL;

	if (bp != NULL) {
	+ zio->io_bp = (blkptr_t *)bp;
	+ zio->io_bp_copy = *bp;
	+ zio->io_bp_orig = *bp;
	if (type != ZIO_TYPE_WRITE \|\|
	- zio->io_child_type == ZIO_CHILD_DDT) {
	- zio->io_bp_copy = *bp;
	+ zio->io_child_type == ZIO_CHILD_DDT)
	zio->io_bp = &zio->io_bp_copy; /* so caller can free */
	- } else {
	- zio->io_bp = (blkptr_t *)bp;
	- }
	- zio->io_bp_orig = *bp;
	if (zio->io_child_type == ZIO_CHILD_LOGICAL)
	zio->io_logical = zio;
	if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
	@@ -913,7 +886,7 @@
	zio->io_logical = pio->io_logical;
	if (zio->io_child_type == ZIO_CHILD_GANG)
	zio->io_gang_leader = pio->io_gang_leader;
	- zio_add_child_first(pio, zio);
	+ zio_add_child(pio, zio);
	}

	taskq_init_ent(&zio->io_tqent);
	@@ -1189,8 +1162,9 @@
	zio_write(zio_t pio, spa_t spa, uint64_t txg, blkptr_t *bp,
	abd_t data, uint64_t lsize, uint64_t psize, const zio_prop_t zp,
	zio_done_func_t ready, zio_done_func_t children_ready,
	- zio_done_func_t done, void private, zio_priority_t priority,
	- zio_flag_t flags, const zbookmark_phys_t *zb)
	+ zio_done_func_t physdone, zio_done_func_t done,
	+ void *private, zio_priority_t priority, zio_flag_t flags,
	+ const zbookmark_phys_t *zb)
	{
	zio_t *zio;

	@@ -1210,6 +1184,7 @@

	zio->io_ready = ready;
	zio->io_children_ready = children_ready;
	+ zio->io_physdone = physdone;
	zio->io_prop = *zp;

	/*
	@@ -1542,11 +1517,16 @@
	flags &= ~ZIO_FLAG_IO_ALLOCATING;
	}

	+
	zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
	done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
	ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
	ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);

	+ zio->io_physdone = pio->io_physdone;
	+ if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
	+ zio->io_logical->io_phys_children++;
	+
	return (zio);
	}

	@@ -1634,8 +1614,15 @@
	abd_return_buf_copy(zio->io_abd, data, psize);
	} else {
	ASSERT(!BP_IS_EMBEDDED(bp));
	+ ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
	}

	+ if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
	+ zio->io_flags \|= ZIO_FLAG_DONT_CACHE;
	+
	+ if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
	+ zio->io_flags \|= ZIO_FLAG_DONT_CACHE;
	+
	if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
	zio->io_pipeline = ZIO_DDT_READ_PIPELINE;

	@@ -2730,7 +2717,7 @@
	blkptr_t *bp = zio->io_bp;

	ASSERT(gio == zio_unique_parent(zio));
	- ASSERT(list_is_empty(&zio->io_child_list));
	+ ASSERT(zio->io_child_count == 0);

	if (zio->io_error)
	return;
	@@ -2988,7 +2975,7 @@
	zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
	has_data ? abd_get_offset(pio->io_abd, pio->io_size -
	resid) : NULL, lsize, lsize, &zp,
	- zio_write_gang_member_ready, NULL,
	+ zio_write_gang_member_ready, NULL, NULL,
	zio_write_gang_done, &gn->gn_child[g], pio->io_priority,
	ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);

	@@ -3450,7 +3437,7 @@
	} else {
	cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
	zio->io_orig_size, zio->io_orig_size, zp,
	- zio_ddt_child_write_ready, NULL,
	+ zio_ddt_child_write_ready, NULL, NULL,
	zio_ddt_child_write_done, dde, zio->io_priority,
	ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);

	@@ -3968,6 +3955,9 @@
	zio->io_type == ZIO_TYPE_WRITE \|\|
	zio->io_type == ZIO_TYPE_TRIM)) {

	+ if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
	+ return (zio);
	+
	if ((zio = vdev_queue_io(zio)) == NULL)
	return (NULL);

	@@ -4004,6 +3994,9 @@
	vd->vdev_ops != &vdev_draid_spare_ops) {
	vdev_queue_io_done(zio);

	+ if (zio->io_type == ZIO_TYPE_WRITE)
	+ vdev_cache_write(zio);
	+
	if (zio_injection_enabled && zio->io_error == 0)
	zio->io_error = zio_handle_device_injections(vd, zio,
	EIO, EILSEQ);
	@@ -4113,7 +4106,8 @@
	ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */
	ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */
	zio->io_error = 0;
	- zio->io_flags \|= ZIO_FLAG_IO_RETRY \| ZIO_FLAG_DONT_AGGREGATE;
	+ zio->io_flags \|= ZIO_FLAG_IO_RETRY \|
	+ ZIO_FLAG_DONT_CACHE \| ZIO_FLAG_DONT_AGGREGATE;
	zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1;
	zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE,
	zio_requeue_io_start_cut_in_line);
	@@ -4153,6 +4147,13 @@
	if (zio->io_error)
	zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

	+ if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
	+ zio->io_physdone != NULL) {
	+ ASSERT(!(zio->io_flags & ZIO_FLAG_DELEGATED));
	+ ASSERT(zio->io_child_type == ZIO_CHILD_VDEV);
	+ zio->io_physdone(zio->io_logical);
	+ }
	+
	return (zio);
	}

	@@ -4474,10 +4475,8 @@
	zio->io_ready(zio);
	}

	-#ifdef ZFS_DEBUG
	if (bp != NULL && bp != &zio->io_bp_copy)
	zio->io_bp_copy = *bp;
	-#endif

	if (zio->io_error != 0) {
	zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
	@@ -4904,7 +4903,7 @@
	return (NULL);
	}

	- ASSERT(list_is_empty(&zio->io_child_list));
	+ ASSERT(zio->io_child_count == 0);
	ASSERT(zio->io_reexecute == 0);
	ASSERT(zio->io_error == 0 \|\| (zio->io_flags & ZIO_FLAG_CANFAIL));

	diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
	--- a/sys/contrib/openzfs/module/zfs/zvol.c
	+++ b/sys/contrib/openzfs/module/zfs/zvol.c
	@@ -1203,7 +1203,8 @@
	* Prefetch is completed, we can do zvol_os_create_minor
	* sequentially.
	*/
	- while ((job = list_remove_head(&minors_list)) != NULL) {
	+ while ((job = list_head(&minors_list)) != NULL) {
	+ list_remove(&minors_list, job);
	if (!job->error)
	(void) zvol_os_create_minor(job->name);
	kmem_strfree(job->name);
	@@ -1310,8 +1311,10 @@
	rw_exit(&zvol_state_lock);

	/* Drop zvol_state_lock before calling zvol_free() */
	- while ((zv = list_remove_head(&free_list)) != NULL)
	+ while ((zv = list_head(&free_list)) != NULL) {
	+ list_remove(&free_list, zv);
	zvol_os_free(zv);
	+ }
	}

	/* Remove minor for this specific volume only */
	diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
	--- a/sys/contrib/openzfs/tests/runfiles/common.run
	+++ b/sys/contrib/openzfs/tests/runfiles/common.run
	@@ -128,7 +128,7 @@
	'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
	'zdb_display_block', 'zdb_encrypted', 'zdb_label_checksum',
	'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id',
	- 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', 'zdb_backup']
	+ 'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
	pre =
	post =
	tags = ['functional', 'cli_root', 'zdb']
	@@ -472,8 +472,7 @@
	tags = ['functional', 'cli_root', 'zpool_replace']

	[tests/functional/cli_root/zpool_resilver]
	-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
	- 'zpool_resilver_concurrent']
	+tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
	tags = ['functional', 'cli_root', 'zpool_resilver']

	[tests/functional/cli_root/zpool_scrub]
	diff --git a/sys/contrib/openzfs/tests/runfiles/freebsd.run b/sys/contrib/openzfs/tests/runfiles/freebsd.run
	--- a/sys/contrib/openzfs/tests/runfiles/freebsd.run
	+++ b/sys/contrib/openzfs/tests/runfiles/freebsd.run
	@@ -25,8 +25,3 @@
	[tests/functional/cli_root/zfs_jail:FreeBSD]
	tests = ['zfs_jail_001_pos']
	tags = ['functional', 'cli_root', 'zfs_jail']
	-
	-[tests/functional/pam:FreeBSD]
	-tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
	- 'pam_short_password']
	-tags = ['functional', 'pam']
	diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
	--- a/sys/contrib/openzfs/tests/runfiles/linux.run
	+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
	@@ -140,8 +140,7 @@
	tags = ['functional', 'mount']

	[tests/functional/pam:Linux]
	-tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
	- 'pam_short_password']
	+tests = ['pam_basic', 'pam_nounmount', 'pam_short_password']
	tags = ['functional', 'pam']

	[tests/functional/procfs:Linux]
	diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
	--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
	+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
	@@ -152,7 +152,6 @@
	['FAIL', rewind_reason],
	'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
	'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
	- 'pool_checkpoint/checkpoint_discard_busy': ['SKIP', 12053],
	'privilege/setup': ['SKIP', na_reason],
	'refreserv/refreserv_004_pos': ['FAIL', known_reason],
	'rootpool/setup': ['SKIP', na_reason],
	@@ -164,8 +163,6 @@
	known.update({
	'cli_root/zfs_receive/receive-o-x_props_override':
	['FAIL', known_reason],
	- 'cli_root/zpool_resilver/zpool_resilver_concurrent':
	- ['SKIP', na_reason],
	'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
	'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
	'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
	@@ -173,7 +170,6 @@
	'link_count/link_count_001': ['SKIP', na_reason],
	'casenorm/mixed_create_failure': ['FAIL', 13215],
	'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
	- 'rsend/send_raw_ashift': ['SKIP', 14961],
	})
	elif sys.platform.startswith('linux'):
	known.update({
	@@ -281,8 +277,6 @@
	'mmp/mmp_inactive_import': ['FAIL', known_reason],
	'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
	'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
	- 'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872],
	- 'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872],
	'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
	'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
	'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
	--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
	+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
	@@ -501,7 +501,7 @@
	srandom(seed);

	zfs_btree_init();
	- zfs_btree_create(&bt, zfs_btree_compare, NULL, sizeof (uint64_t));
	+ zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));

	/*
	* This runs the named negative test. None of them should
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
	--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
	+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
	@@ -3706,7 +3706,7 @@

	while $do_once \|\| [ $stat1 -ne $stat2 ] \|\| [ $stat2 -eq 0 ]; do
	typeset stat1=$(get_arcstat $stat)
	- sleep 0.5
	+ sleep 2
	typeset stat2=$(get_arcstat $stat)
	do_once=false
	done
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
	@@ -572,7 +572,6 @@
	functional/cli_root/zdb/zdb_006_pos.ksh \
	functional/cli_root/zdb/zdb_args_neg.ksh \
	functional/cli_root/zdb/zdb_args_pos.ksh \
	- functional/cli_root/zdb/zdb_backup.ksh \
	functional/cli_root/zdb/zdb_block_size_histogram.ksh \
	functional/cli_root/zdb/zdb_checksum.ksh \
	functional/cli_root/zdb/zdb_decompress.ksh \
	@@ -1143,7 +1142,6 @@
	functional/cli_root/zpool_resilver/setup.ksh \
	functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \
	functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \
	- functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh \
	functional/cli_root/zpool_scrub/cleanup.ksh \
	functional/cli_root/zpool_scrub/setup.ksh \
	functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
	deleted file mode 100755
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
	+++ /dev/null
	@@ -1,55 +0,0 @@
	-#!/bin/ksh
	-
	-#
	-# This file and its contents are supplied under the terms of the
	-# Common Development and Distribution License ("CDDL"), version 1.0.
	-# You may only use this file in accordance with the terms of version
	-# 1.0 of the CDDL.
	-#
	-# A full copy of the text of the CDDL should have accompanied this
	-# source. A copy of the CDDL is also available via the Internet at
	-# http://www.illumos.org/license/CDDL.
	-#
	-
	-#
	-# Copyright (c) 2023, Klara Inc.
	-#
	-
	-. $STF_SUITE/include/libtest.shlib
	-
	-write_count=8
	-blksize=131072
	-
	-tmpfile=$TEST_BASE_DIR/tmpfile
	-
	-function cleanup
	-{
	- datasetexists $TESTPOOL && destroy_pool $TESTPOOL
	- rm $tmpfile.1 $tmpfile.2
	-}
	-
	-log_onexit cleanup
	-
	-log_assert "Verify that zfs send and zdb -B produce the same stream"
	-
	-verify_runnable "global"
	-verify_disk_count "$DISKS" 2
	-
	-default_mirror_setup_noexit $DISKS
	-file_write -o create -w -f $TESTDIR/file -b $blksize -c $write_count
	-
	-snap=$TESTPOOL/$TESTFS@snap
	-log_must zfs snapshot $snap
	-typeset -i objsetid=$(zfs get -Ho value objsetid $snap)
	-
	-sync_pool $TESTPOOL
	-
	-log_must eval "zfs send -ecL $snap > $tmpfile.1"
	-log_must eval "zdb -B $TESTPOOL/$objsetid ecL > $tmpfile.2"
	-
	-typeset sum1=$(cat $tmpfile.1 \| md5sum)
	-typeset sum2=$(cat $tmpfile.2 \| md5sum)
	-
	-log_must test "$sum1" = "$sum2"
	-
	-log_pass "zfs send and zdb -B produce the same stream"
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
	deleted file mode 100755
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
	+++ /dev/null
	@@ -1,101 +0,0 @@
	-#!/bin/ksh -p
	-#
	-# CDDL HEADER START
	-#
	-# The contents of this file are subject to the terms of the
	-# Common Development and Distribution License (the "License").
	-# You may not use this file except in compliance with the License.
	-#
	-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	-# or http://www.opensolaris.org/os/licensing.
	-# See the License for the specific language governing permissions
	-# and limitations under the License.
	-#
	-# When distributing Covered Code, include this CDDL HEADER in each
	-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	-# If applicable, add the following below this CDDL HEADER, with the
	-# fields enclosed by brackets "[]" replaced with your own identifying
	-# information: Portions Copyright [yyyy] [name of copyright owner]
	-#
	-# CDDL HEADER END
	-#
	-
	-#
	-# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
	-#
	-
	-. $STF_SUITE/include/libtest.shlib
	-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
	-
	-#
	-# DESCRIPTION:
	-# Verify 'zpool clear' doesn't cause concurrent resilvers
	-#
	-# STRATEGY:
	-# 1. Create N(10) virtual disk files.
	-# 2. Create draid pool based on the virtual disk files.
	-# 3. Fill the filesystem with directories and files.
	-# 4. Force-fault 2 vdevs and verify distributed spare is kicked in.
	-# 5. Free the distributed spare by replacing the faulty drive.
	-# 6. Run zpool clear and verify that it does not initiate 2 resilvers
	-# concurrently while distributed spare gets kicked in.
	-#
	-
	-verify_runnable "global"
	-
	-typeset -ir devs=10
	-typeset -ir nparity=1
	-typeset -ir ndata=8
	-typeset -ir dspare=1
	-
	-function cleanup
	-{
	- poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
	-
	- for i in {0..$devs}; do
	- log_must rm -f "$BASEDIR/vdev$i"
	- done
	-
	- for dir in $BASEDIR; do
	- if [[ -d $dir ]]; then
	- log_must rm -rf $dir
	- fi
	- done
	-
	- zed_stop
	- zed_cleanup
	-}
	-
	-log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
	-log_onexit cleanup
	-
	-setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
	-
	-# ZED needed for sequential resilver
	-zed_setup
	-log_must zed_start
	-
	-log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
	-log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
	-log_must zpool wait -t resilver $TESTPOOL
	-log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
	-
	-log_must zpool labelclear -f $BASEDIR/vdev5
	-log_must zpool labelclear -f $BASEDIR/vdev6
	-
	-log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
	-sync_pool $TESTPOOL
	-
	-log_must zpool events -c
	-log_must zpool clear $TESTPOOL
	-log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
	-log_must zpool wait -t resilver $TESTPOOL
	-log_must zpool wait -t scrub $TESTPOOL
	-
	-nof_resilver=$(zpool events \| grep -c resilver_start)
	-if [ $nof_resilver = 1 ] ; then
	- log_must verify_pool $TESTPOOL
	- log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
	-else
	- log_fail "FAIL: sequential and healing resilver initiated concurrently"
	-fi
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
	@@ -25,7 +25,7 @@
	is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"

	set -A args "" "-s \",\"" "-v" \
	- "-f time,cwc,imnb,imsb"
	+ "-f time,zcwc,zimnb,zimsb"

	log_assert "zilstat generates output and doesn't return an error code"

	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
	@@ -27,14 +27,15 @@
	#
	# STRATEGY:
	# 1. Create pool with a cache device.
	-# 2. Create a random file in that pool and random read for 10 sec.
	-# 3. Export pool.
	-# 4. Read the amount of log blocks written from the header of the
	+# 2. Export and re-import pool without writing any data.
	+# 3. Create a random file in that pool and random read for 10 sec.
	+# 4. Export pool.
	+# 5. Read the amount of log blocks written from the header of the
	# L2ARC device.
	-# 5. Import pool.
	-# 6. Read the amount of log blocks rebuilt in arcstats and compare to
	+# 6. Import pool.
	+# 7. Read the amount of log blocks rebuilt in arcstats and compare to
	# (5).
	-# 7. Check if the labels of the L2ARC device are intact.
	+# 8. Check if the labels of the L2ARC device are intact.
	#
	# * We can predict the minimum bytes of L2ARC restored if we subtract
	# from the effective size of the cache device the bytes l2arc_evict()
	@@ -76,8 +77,10 @@

	log_must truncate -s ${cache_sz}M $VDEV_CACHE

	-log_must zpool create -f -o ashift=12 $TESTPOOL $VDEV
	-log_must zpool add $TESTPOOL cache $VDEV_CACHE
	+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
	+
	+log_must zpool export $TESTPOOL
	+log_must zpool import -d $VDIR $TESTPOOL

	log_must fio $FIO_SCRIPTS/mkfiles.fio
	log_must fio $FIO_SCRIPTS/random_reads.fio
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
	@@ -25,6 +25,5 @@
	rmconfig
	destroy_pool $TESTPOOL
	del_user ${username}
	-del_user ${username}rec
	del_group pamtestgroup
	log_must rm -rf "$runstatedir" $TESTDIRS
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
	deleted file mode 100755
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
	+++ /dev/null
	@@ -1,55 +0,0 @@
	-#!/bin/ksh -p
	-#
	-# CDDL HEADER START
	-#
	-# The contents of this file are subject to the terms of the
	-# Common Development and Distribution License (the "License").
	-# You may not use this file except in compliance with the License.
	-#
	-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	-# or https://opensource.org/licenses/CDDL-1.0.
	-# See the License for the specific language governing permissions
	-# and limitations under the License.
	-#
	-# When distributing Covered Code, include this CDDL HEADER in each
	-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	-# If applicable, add the following below this CDDL HEADER, with the
	-# fields enclosed by brackets "[]" replaced with your own identifying
	-# information: Portions Copyright [yyyy] [name of copyright owner]
	-#
	-# CDDL HEADER END
	-#
	-
	-. $STF_SUITE/tests/functional/pam/utilities.kshlib
	-
	-if [ -n "$ASAN_OPTIONS" ]; then
	- export LD_PRELOAD=$(ldd "$(command -v zfs)" \| awk '/libasan\.so/ {print $3}')
	-fi
	-
	-log_mustnot ismounted "$TESTPOOL/pam/${username}"
	-keystatus unavailable
	-
	-genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir}"
	-
	-printf "testpass\nsecondpass\nsecondpass\n" \| pamtester -v ${pamservice} ${username} chauthtok
	-
	-log_mustnot ismounted "$TESTPOOL/pam/${username}"
	-keystatus unavailable
	-
	-echo "secondpass" \| pamtester ${pamservice} ${username} open_session
	-references 1
	-log_must ismounted "$TESTPOOL/pam/${username}"
	-keystatus available
	-
	-printf "secondpass\ntestpass\ntestpass\n" \| pamtester -v ${pamservice} ${username} chauthtok
	-
	-log_must ismounted "$TESTPOOL/pam/${username}"
	-log_must ismounted "$TESTPOOL/pam/${username}"
	-keystatus available
	-
	-log_must pamtester ${pamservice} ${username} close_session
	-references 0
	-log_mustnot ismounted "$TESTPOOL/pam/${username}"
	-keystatus unavailable
	-
	-log_pass "done."
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
	deleted file mode 100755
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
	+++ /dev/null
	@@ -1,72 +0,0 @@
	-#!/bin/ksh -p
	-#
	-# CDDL HEADER START
	-#
	-# The contents of this file are subject to the terms of the
	-# Common Development and Distribution License (the "License").
	-# You may not use this file except in compliance with the License.
	-#
	-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	-# or https://opensource.org/licenses/CDDL-1.0.
	-# See the License for the specific language governing permissions
	-# and limitations under the License.
	-#
	-# When distributing Covered Code, include this CDDL HEADER in each
	-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	-# If applicable, add the following below this CDDL HEADER, with the
	-# fields enclosed by brackets "[]" replaced with your own identifying
	-# information: Portions Copyright [yyyy] [name of copyright owner]
	-#
	-# CDDL HEADER END
	-#
	-
	-. $STF_SUITE/tests/functional/pam/utilities.kshlib
	-
	-if [ -n "$ASAN_OPTIONS" ]; then
	- export LD_PRELOAD=$(ldd "$(command -v zfs)" \| awk '/libasan\.so/ {print $3}')
	-fi
	-
	-username="${username}rec"
	-
	-# Set up a deeper hierarchy, a mountpoint that doesn't interfere with other tests,
	-# and a user which references that mountpoint
	-log_must zfs create "$TESTPOOL/pampam"
	-log_must zfs create -o mountpoint="$TESTDIR/rec" "$TESTPOOL/pampam/pam"
	-echo "recurpass" \| zfs create -o encryption=aes-256-gcm -o keyformat=passphrase \
	- -o keylocation=prompt "$TESTPOOL/pampam/pam/${username}"
	-log_must zfs unmount "$TESTPOOL/pampam/pam/${username}"
	-log_must zfs unload-key "$TESTPOOL/pampam/pam/${username}"
	-log_must add_user pamtestgroup ${username} "$TESTDIR/rec"
	-
	-function keystatus {
	- log_must [ "$(get_prop keystatus "$TESTPOOL/pampam/pam/${username}")" = "$1" ]
	-}
	-
	-log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
	-keystatus unavailable
	-
	-function test_session {
	- echo "recurpass" \| pamtester ${pamservice} ${username} open_session
	- references 1
	- log_must ismounted "$TESTPOOL/pampam/pam/${username}"
	- keystatus available
	-
	- log_must pamtester ${pamservice} ${username} close_session
	- references 0
	- log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
	- keystatus unavailable
	-}
	-
	-genconfig "homes=$TESTPOOL/pampam/pam prop_mountpoint runstatedir=${runstatedir}"
	-test_session
	-
	-genconfig "homes=$TESTPOOL/pampam recursive_homes prop_mountpoint runstatedir=${runstatedir}"
	-test_session
	-
	-genconfig "homes=$TESTPOOL recursive_homes prop_mountpoint runstatedir=${runstatedir}"
	-test_session
	-
	-genconfig "homes=* recursive_homes prop_mountpoint runstatedir=${runstatedir}"
	-test_session
	-
	-log_pass "done."
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
	@@ -52,7 +52,7 @@
	keystatus available

	# Change user and dataset password to short one.
	-printf "testpass\nshort\nshort\n" \| pamtester -v ${pamservice} ${username} chauthtok
	+printf "short\nshort\n" \| pamtester ${pamservice} ${username} chauthtok

	# Unmount and unload key.
	log_must pamtester ${pamservice} ${username} close_session
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
	@@ -38,8 +38,6 @@

	verify_runnable "global"

	-log_unsupported "Skipping, issue https://github.com/openzfs/zfs/issues/12053"
	-
	function test_cleanup
	{
	# reset memory limit to 16M
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
	@@ -37,10 +37,6 @@

	log_assert "Verify raw sending to pools with greater ashift succeeds"

	-if is_freebsd; then
	- log_unsupported "Runs too long on FreeBSD 14 (Issue #14961)"
	-fi
	-
	function cleanup
	{
	rm -f $BACKDIR/fs@*
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
	@@ -45,15 +45,6 @@

	if ! is_linux ; then
	log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
	-else
	- if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
	- log_unsupported "Disabled while issue #14872 is being worked"
	- fi
	-
	- # Disabled for the CentOS 9 kernel
	- if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
	- log_unsupported "Disabled while issue #14872 is being worked"
	- fi
	fi

	typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
	diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
	--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
	+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
	@@ -44,15 +44,6 @@
	verify_runnable "global"

	if is_linux ; then
	- if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
	- log_unsupported "Disabled while issue #14872 is being worked"
	- fi
	-
	- # Disabled for the CentOS 9 kernel
	- if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
	- log_unsupported "Disabled while issue #14872 is being worked"
	- fi
	-
	# We need '--force' here since the prior tests may leave a filesystem
	# on the zvol, and blkdiscard will see that filesystem and print a
	# warning unless you force it.
	@@ -132,6 +123,7 @@
	# Remove old data from previous tests
	log_must $trimcmd $zvolpath

	+
	set_blk_mq 1
	log_must_busy zpool export $TESTPOOL
	log_must zpool import $TESTPOOL
	diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile
	--- a/sys/modules/zfs/Makefile
	+++ b/sys/modules/zfs/Makefile
	@@ -38,7 +38,7 @@

	CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS \
	-DHAVE_UIO_ZEROCOPY -DWITHOUT_NETDUMP -D__KERNEL -D_SYS_CONDVAR_H_ \
	- -D_SYS_VMEM_H_
	+ -D_SYS_VMEM_H_ -DIN_FREEBSD_BASE

	.if ${MACHINE_ARCH} == "amd64"
	CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
	@@ -295,6 +295,7 @@
	uberblock.c \
	unique.c \
	vdev.c \
	+ vdev_cache.c \
	vdev_draid.c \
	vdev_draid_rand.c \
	vdev_indirect.c \
	diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
	--- a/sys/modules/zfs/zfs_config.h
	+++ b/sys/modules/zfs/zfs_config.h
	@@ -653,9 +653,6 @@
	/* qat is enabled and existed */
	/* #undef HAVE_QAT */

	-/* struct reclaim_state has reclaimed */
	-/* #undef HAVE_RECLAIM_STATE_RECLAIMED */
	-
	/* register_shrinker is vararg */
	/* #undef HAVE_REGISTER_SHRINKER_VARARG */

	@@ -1051,7 +1048,7 @@
	/* #undef ZFS_IS_GPL_COMPATIBLE */

	/* Define the project alias string. */
	-#define ZFS_META_ALIAS "zfs-2.2.0-FreeBSD_g009d3288"
	+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gad0a55461"

	/* Define the project author. */
	#define ZFS_META_AUTHOR "OpenZFS"
	@@ -1060,7 +1057,7 @@
	/* #undef ZFS_META_DATA */

	/* Define the maximum compatible kernel version. */
	-#define ZFS_META_KVER_MAX "6.3"
	+#define ZFS_META_KVER_MAX "6.2"

	/* Define the minimum compatible kernel version. */
	#define ZFS_META_KVER_MIN "3.10"
	@@ -1081,10 +1078,10 @@
	#define ZFS_META_NAME "zfs"

	/* Define the project release. */
	-#define ZFS_META_RELEASE "FreeBSD_g009d3288"
	+#define ZFS_META_RELEASE "FreeBSD_gad0a55461"

	/* Define the project version. */
	-#define ZFS_META_VERSION "2.2.0"
	+#define ZFS_META_VERSION "2.1.99"

	/* count is located in percpu_ref.data */
	/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
	diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
	--- a/sys/modules/zfs/zfs_gitrev.h
	+++ b/sys/modules/zfs/zfs_gitrev.h
	@@ -1 +1 @@
	-#define ZFS_META_GITREV "zfs-2.2.0-rc1-0-g009d3288d"
	+#define ZFS_META_GITREV "zfs-2.1.99-1955-gad0a55461"

File Metadata

Mime Type: text/plain
Expires: Tue, Sep 24, 11:24 PM (14 h, 11 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 12708010
Default Alt Text: D41483.id.diff (263 KB)

D41483.id.diffNo OneTemporaryActions

D41483.id.diffView Options

File Metadata

Event Timeline

D41483.id.diff
No OneTemporary
Actions

D41483.id.diff
View Options