D44720.diff
No OneTemporary
Actions

Size

28 KB

Referenced Files

None

Subscribers

None

D44720.diff
View Options

	diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c
	--- a/sys/cam/ctl/ctl.c
	+++ b/sys/cam/ctl/ctl.c
	@@ -83,6 +83,7 @@
	#include <cam/ctl/ctl_ha.h>
	#include <cam/ctl/ctl_private.h>
	#include <cam/ctl/ctl_debug.h>
	+#include <cam/ctl/ctl_nvme_all.h>
	#include <cam/ctl/ctl_scsi_all.h>
	#include <cam/ctl/ctl_error.h>

	@@ -447,6 +448,8 @@
	static void ctl_failover_lun(union ctl_io *io);
	static void ctl_scsiio_precheck(struct ctl_scsiio *ctsio);
	static int ctl_scsiio(struct ctl_scsiio *ctsio);
	+static void ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio);
	+static int ctl_nvmeio(struct ctl_nvmeio *ctnio);

	static int ctl_target_reset(union ctl_io *io);
	static void ctl_do_lun_reset(struct ctl_lun *lun, uint32_t initidx,
	@@ -4963,7 +4966,6 @@
	int retval;

	CTL_DEBUG_PRINT(("ctl_config_move_done\n"));
	- CTL_IO_ASSERT(io, SCSI);

	if (ctl_debug & CTL_DEBUG_CDB_DATA)
	ctl_data_print(io);
	@@ -4998,7 +5000,17 @@
	* XXX KDM call ctl_scsiio() again for now, and check flag
	* bits to see whether we're allocated or not.
	*/
	- retval = ctl_scsiio(&io->scsiio);
	+ switch (io->io_hdr.io_type) {
	+ case CTL_IO_SCSI:
	+ retval = ctl_scsiio(&io->scsiio);
	+ break;
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	+ retval = ctl_nvmeio(&io->nvmeio);
	+ break;
	+ default:
	+ __assert_unreachable();
	+ }
	}
	return (retval);
	}
	@@ -10598,6 +10610,725 @@
	return (CTL_RETVAL_COMPLETE);
	}

	+/*
	+ * For NVMe commands, parse the LBA and length.
	+ */
	+static bool
	+ctl_nvme_get_lba_len(struct ctl_nvmeio ctnio, uint64_t lba, uint32_t *len)
	+{
	+ CTL_IO_ASSERT(ctnio, NVME);
	+
	+ switch (ctnio->cmd.opc) {
	+ case NVME_OPC_WRITE:
	+ case NVME_OPC_READ:
	+ case NVME_OPC_WRITE_UNCORRECTABLE:
	+ case NVME_OPC_COMPARE:
	+ case NVME_OPC_WRITE_ZEROES:
	+ case NVME_OPC_VERIFY:
	+ *lba = (uint64_t)le32toh(ctnio->cmd.cdw11) << 32 \|
	+ le32toh(ctnio->cmd.cdw10);
	+ *len = (le32toh(ctnio->cmd.cdw12) & 0xffff) + 1;
	+ return (true);
	+ default:
	+ *lba = 0;
	+ *len = 0;
	+ return (false);
	+ }
	+}
	+
	+static bool
	+ctl_nvme_fua(struct ctl_nvmeio *ctnio)
	+{
	+ return ((le32toh(ctnio->cmd.cdw12) & (1U << 30)) != 0);
	+}
	+
	+int
	+ctl_nvme_identify(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ size_t len;
	+ int retval;
	+ uint8_t cns;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_identify\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME_ADMIN);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_IDENTIFY);
	+
	+ /*
	+ * The data buffer for Identify is always 4096 bytes, see
	+ * 5.51.1 in NVMe base specification 1.4.
	+ */
	+ len = 4096;
	+
	+ ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
	+ ctnio->kern_data_len = len;
	+ ctnio->kern_total_len = len;
	+ ctnio->kern_rel_offset = 0;
	+ ctnio->kern_sg_entries = 0;
	+
	+ ctl_nvme_set_success(ctnio);
	+ ctnio->io_hdr.flags \|= CTL_FLAG_ALLOCATED;
	+ ctnio->be_move_done = ctl_config_move_done;
	+
	+ /*
	+ * If we don't have a LUN, return an empty result for CNS == 0.
	+ */
	+ if (lun == NULL) {
	+ cns = le32toh(ctnio->cmd.cdw10) & 0xff;
	+ switch (cns) {
	+ case 0:
	+ memset(ctnio->kern_data_ptr, 0, len);
	+ ctl_datamove((union ctl_io *)ctnio);
	+ break;
	+ default:
	+ ctl_nvme_set_invalid_field(ctnio);
	+ break;
	+ }
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ retval = lun->backend->config_read((union ctl_io *)ctnio);
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_flush(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_flush\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_FLUSH);
	+
	+ /*
	+ * NVMe flushes always flush the entire namespace, not an LBA
	+ * range.
	+ */
	+ retval = lun->backend->config_write((union ctl_io *)ctnio);
	+
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_read_write(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct ctl_lba_len_flags *lbalen;
	+ uint64_t lba;
	+ uint32_t num_blocks;
	+ int flags, retval;
	+ bool isread;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_read_write: command: %#x\n",
	+ ctnio->cmd.opc));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE \|\|
	+ ctnio->cmd.opc == NVME_OPC_READ);
	+
	+ flags = 0;
	+ isread = ctnio->cmd.opc == NVME_OPC_READ;
	+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
	+
	+ /*
	+ * The first check is to make sure we're in bounds, the second
	+ * check is to catch wrap-around problems. If the lba + num blocks
	+ * is less than the lba, then we've wrapped around and the block
	+ * range is invalid anyway.
	+ */
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ /*
	+ * Set FUA and/or DPO if caches are disabled.
	+ *
	+ * For a read this may not be quite correct for the block
	+ * backend as any earlier writes to the LBA range should be
	+ * flushed to backing store as part of the read.
	+ */
	+ if (ctl_nvme_fua(ctnio)) {
	+ flags \|= CTL_LLF_FUA;
	+ if (isread)
	+ flags \|= CTL_LLF_DPO;
	+ }
	+
	+ lbalen = (struct ctl_lba_len_flags *)
	+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
	+ lbalen->lba = lba;
	+ lbalen->len = num_blocks;
	+ lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) \| flags;
	+
	+ ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
	+ ctnio->kern_rel_offset = 0;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_read_write: calling data_submit()\n"));
	+
	+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct ctl_lba_len_flags *lbalen;
	+ uint64_t lba;
	+ uint32_t num_blocks;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_write_uncorrectable\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_UNCORRECTABLE);
	+
	+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
	+
	+ /*
	+ * The first check is to make sure we're in bounds, the second
	+ * check is to catch wrap-around problems. If the lba + num blocks
	+ * is less than the lba, then we've wrapped around and the block
	+ * range is invalid anyway.
	+ */
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ lbalen = (struct ctl_lba_len_flags *)
	+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
	+ lbalen->lba = lba;
	+ lbalen->len = num_blocks;
	+ lbalen->flags = 0;
	+ retval = lun->backend->config_write((union ctl_io *)ctnio);
	+
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_compare(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct ctl_lba_len_flags *lbalen;
	+ uint64_t lba;
	+ uint32_t num_blocks;
	+ int flags;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_compare\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_COMPARE);
	+
	+ flags = 0;
	+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
	+ if (ctl_nvme_fua(ctnio))
	+ flags \|= CTL_LLF_FUA;
	+
	+ /*
	+ * The first check is to make sure we're in bounds, the second
	+ * check is to catch wrap-around problems. If the lba + num blocks
	+ * is less than the lba, then we've wrapped around and the block
	+ * range is invalid anyway.
	+ */
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ lbalen = (struct ctl_lba_len_flags *)
	+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
	+ lbalen->lba = lba;
	+ lbalen->len = num_blocks;
	+ lbalen->flags = CTL_LLF_COMPARE \| flags;
	+ ctnio->kern_total_len = num_blocks * lun->be_lun->blocksize;
	+ ctnio->kern_rel_offset = 0;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_compare: calling data_submit()\n"));
	+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct ctl_lba_len_flags *lbalen;
	+ uint64_t lba;
	+ uint32_t num_blocks;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_write_zeroes\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_WRITE_ZEROES);
	+
	+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
	+
	+ /*
	+ * The first check is to make sure we're in bounds, the second
	+ * check is to catch wrap-around problems. If the lba + num blocks
	+ * is less than the lba, then we've wrapped around and the block
	+ * range is invalid anyway.
	+ */
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ lbalen = (struct ctl_lba_len_flags *)
	+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
	+ lbalen->lba = lba;
	+ lbalen->len = num_blocks;
	+ lbalen->flags = 0;
	+ retval = lun->backend->config_write((union ctl_io *)ctnio);
	+
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct nvme_dsm_range *r;
	+ uint64_t lba;
	+ uint32_t len, num_blocks;
	+ u_int i, ranges;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_dataset_management\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_DATASET_MANAGEMENT);
	+
	+ ranges = le32toh(ctnio->cmd.cdw10) & 0xff;
	+ len = ranges * sizeof(struct nvme_dsm_range);
	+
	+ /*
	+ * If we've got a kernel request that hasn't been malloced yet,
	+ * malloc it and tell the caller the data buffer is here.
	+ */
	+ if ((ctnio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) {
	+ ctnio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);
	+ ctnio->kern_data_len = len;
	+ ctnio->kern_total_len = len;
	+ ctnio->kern_rel_offset = 0;
	+ ctnio->kern_sg_entries = 0;
	+ ctnio->io_hdr.flags \|= CTL_FLAG_ALLOCATED;
	+ ctnio->be_move_done = ctl_config_move_done;
	+ ctl_datamove((union ctl_io *)ctnio);
	+
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ /*
	+ * Require a flat buffer of the correct size.
	+ */
	+ if (ctnio->kern_sg_entries > 0 \|\|
	+ ctnio->kern_total_len - ctnio->kern_data_resid != len)
	+ return (CTL_RETVAL_ERROR);
	+
	+ /*
	+ * Verify that none of the ranges are out of bounds.
	+ */
	+ r = (struct nvme_dsm_range *)ctnio->kern_data_ptr;
	+ for (i = 0; i < ranges; i++) {
	+ lba = le64toh(r[i].starting_lba);
	+ num_blocks = le32toh(r[i].length);
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+ }
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_dataset_management: calling config_write()\n"));
	+ retval = lun->backend->config_write((union ctl_io *)ctnio);
	+ return (retval);
	+}
	+
	+int
	+ctl_nvme_verify(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_lun *lun = CTL_LUN(ctnio);
	+ struct ctl_lba_len_flags *lbalen;
	+ uint64_t lba;
	+ uint32_t num_blocks;
	+ int flags;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_verify\n"));
	+
	+ CTL_IO_ASSERT(ctnio, NVME);
	+ MPASS(ctnio->cmd.opc == NVME_OPC_VERIFY);
	+
	+ flags = 0;
	+ ctl_nvme_get_lba_len(ctnio, &lba, &num_blocks);
	+ if (ctl_nvme_fua(ctnio))
	+ flags \|= CTL_LLF_FUA;
	+
	+ /*
	+ * The first check is to make sure we're in bounds, the second
	+ * check is to catch wrap-around problems. If the lba + num blocks
	+ * is less than the lba, then we've wrapped around and the block
	+ * range is invalid anyway.
	+ */
	+ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1))
	+ \|\| ((lba + num_blocks) < lba)) {
	+ ctl_nvme_set_lba_out_of_range(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ lbalen = (struct ctl_lba_len_flags *)
	+ &ctnio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
	+ lbalen->lba = lba;
	+ lbalen->len = num_blocks;
	+ lbalen->flags = CTL_LLF_VERIFY \| flags;
	+ ctnio->kern_total_len = 0;
	+ ctnio->kern_rel_offset = 0;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvme_verify: calling data_submit()\n"));
	+ retval = lun->backend->data_submit((union ctl_io *)ctnio);
	+ return (retval);
	+}
	+
	+static const struct ctl_nvme_cmd_entry *
	+ctl_nvme_get_cmd_entry(struct ctl_nvmeio *ctnio)
	+{
	+ const struct ctl_nvme_cmd_entry *entry;
	+
	+ switch (ctnio->io_hdr.io_type) {
	+ case CTL_IO_NVME:
	+ entry = &nvme_nvm_cmd_table[ctnio->cmd.opc];
	+ break;
	+ case CTL_IO_NVME_ADMIN:
	+ entry = &nvme_admin_cmd_table[ctnio->cmd.opc];
	+ break;
	+ default:
	+ __assert_unreachable();
	+ }
	+ return (entry);
	+}
	+
	+static const struct ctl_nvme_cmd_entry *
	+ctl_nvme_validate_command(struct ctl_nvmeio *ctnio)
	+{
	+ const struct ctl_nvme_cmd_entry *entry;
	+
	+ entry = ctl_nvme_get_cmd_entry(ctnio);
	+ if (entry->execute == NULL) {
	+ ctl_nvme_set_invalid_opcode(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (NULL);
	+ }
	+
	+ /* Validate fused commands. */
	+ switch (NVMEV(NVME_CMD_FUSE, ctnio->cmd.fuse)) {
	+ case NVME_FUSE_NORMAL:
	+ break;
	+ case NVME_FUSE_FIRST:
	+ if (ctnio->io_hdr.io_type != CTL_IO_NVME \|\|
	+ ctnio->cmd.opc != NVME_OPC_COMPARE) {
	+ ctl_nvme_set_invalid_field(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (NULL);
	+ }
	+ break;
	+ case NVME_FUSE_SECOND:
	+ if (ctnio->io_hdr.io_type != CTL_IO_NVME \|\|
	+ ctnio->cmd.opc != NVME_OPC_COMPARE) {
	+ ctl_nvme_set_invalid_field(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (NULL);
	+ }
	+ break;
	+ default:
	+ ctl_nvme_set_invalid_field(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return (NULL);
	+ }
	+
	+ return (entry);
	+}
	+
	+/*
	+ * This is a simpler version of ctl_scsiio_lun_check that fails
	+ * requests on a LUN without active media.
	+ *
	+ * Returns true if the command has been completed with an error.
	+ */
	+static bool
	+ctl_nvmeio_lun_check(struct ctl_lun *lun,
	+ const struct ctl_nvme_cmd_entry entry, struct ctl_nvmeio ctnio)
	+{
	+ mtx_assert(&lun->lun_lock, MA_OWNED);
	+
	+ if ((entry->flags & CTL_CMD_FLAG_OK_ON_NO_MEDIA) == 0) {
	+ if ((lun->flags & (CTL_LUN_EJECTED \| CTL_LUN_NO_MEDIA \|
	+ CTL_LUN_STOPPED)) != 0) {
	+ ctl_nvme_set_namespace_not_ready(ctnio);
	+ return (true);
	+ }
	+ }
	+
	+ return (false);
	+}
	+
	+/*
	+ * Check for blockage against the OOA (Order Of Arrival) queue.
	+ * Assumptions:
	+ * - pending_io is generally either incoming, or on the blocked queue
	+ * - starting I/O is the I/O we want to start the check with.
	+ */
	+static ctl_action
	+ctl_nvme_check_ooa(struct ctl_lun lun, union ctl_io pending_io,
	+ union ctl_io starting_io, union ctl_io aborted_io)
	+{
	+ union ctl_io ooa_io = starting_io;
	+
	+ CTL_IO_ASSERT(pending_io, NVME, NVME_ADMIN);
	+
	+ mtx_assert(&lun->lun_lock, MA_OWNED);
	+
	+ *aborted_io = NULL;
	+
	+ /*
	+ * Aborted commands are not going to be executed and may even
	+ * not report completion, so we don't care about their order.
	+ * Let them complete ASAP to clean the OOA queue.
	+ */
	+ if (__predict_false(pending_io->io_hdr.flags & CTL_FLAG_ABORT))
	+ return (CTL_ACTION_PASS);
	+
	+ /*
	+ * NVMe has rather simple command ordering requirements. In
	+ * particular, there is no requirement on the controller to
	+ * enforce a specific order for overlapping LBAs. The only
	+ * constraint is that fused operations (Compare and Write),
	+ * must be completed as a unit.
	+ *
	+ * To support fused operations, the following strategy is used:
	+ * - the first half of a fused command is not enqueued to rtr
	+ * until the second half is enqueued
	+ * - the second half of a fused command blocks on the first
	+ * half of a fuse command
	+ * - subsequent commands block on the second half of the
	+ * fused command
	+ */
	+
	+ /*
	+ * Is the previously submitted command the first half of a
	+ * fused operation?
	+ */
	+ if (ooa_io != NULL &&
	+ NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) == NVME_FUSE_FIRST) {
	+ /*
	+ * If this is the second half, enqueue the first half
	+ * and block the second half on the first half.
	+ */
	+ if (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse) ==
	+ NVME_FUSE_SECOND) {
	+ /*
	+ * XXX: Do we need to wait for other rtr requests
	+ * to drain so this is truly atomic?
	+ */
	+ return (CTL_ACTION_FUSED);
	+ }
	+
	+ /* Abort the first half. */
	+ ctl_nvme_set_missing_fused_command(&ooa_io->nvmeio);
	+ *aborted_io = ooa_io;
	+ } else {
	+ switch (NVMEV(NVME_CMD_FUSE, pending_io->nvmeio.cmd.fuse)) {
	+ case NVME_FUSE_FIRST:
	+ /* First half, wait for the second half. */
	+ return (CTL_ACTION_SKIP);
	+ case NVME_FUSE_SECOND:
	+ /* Second half without a matching first half, abort. */
	+ ctl_nvme_set_missing_fused_command(&pending_io->nvmeio);
	+ *aborted_io = pending_io;
	+ return (CTL_ACTION_SKIP);
	+ }
	+ }
	+
	+ /*
	+ * Scan the OOA queue looking for the most recent second half
	+ * of a fused op.
	+ */
	+ for (; ooa_io != NULL;
	+ ooa_io = (union ctl_io *)LIST_NEXT(&ooa_io->io_hdr, ooa_links)) {
	+ if (NVMEV(NVME_CMD_FUSE, ooa_io->nvmeio.cmd.fuse) ==
	+ NVME_FUSE_SECOND) {
	+ *starting_io = ooa_io;
	+ return (CTL_ACTION_BLOCK);
	+ }
	+ }
	+
	+ *starting_io = NULL;
	+ return (CTL_ACTION_PASS);
	+}
	+
	+static void
	+ctl_nvmeio_precheck(struct ctl_nvmeio *ctnio)
	+{
	+ struct ctl_softc *softc = CTL_SOFTC(ctnio);
	+ struct ctl_lun *lun;
	+ const struct ctl_nvme_cmd_entry *entry;
	+ union ctl_io bio, aborted_io;
	+ uint32_t targ_lun;
	+
	+ lun = NULL;
	+ targ_lun = ctnio->io_hdr.nexus.targ_mapped_lun;
	+ if (targ_lun < ctl_max_luns)
	+ lun = softc->ctl_luns[targ_lun];
	+ if (lun != NULL) {
	+ /*
	+ * If the LUN is invalid, pretend that it doesn't exist.
	+ * It will go away as soon as all pending I/O has been
	+ * completed.
	+ */
	+ mtx_lock(&lun->lun_lock);
	+ if (lun->flags & CTL_LUN_DISABLED) {
	+ mtx_unlock(&lun->lun_lock);
	+ lun = NULL;
	+ }
	+ }
	+ CTL_LUN(ctnio) = lun;
	+ if (lun != NULL) {
	+ CTL_BACKEND_LUN(ctnio) = lun->be_lun;
	+
	+ /*
	+ * Every I/O goes into the OOA queue for a particular LUN,
	+ * and stays there until completion.
	+ */
	+#ifdef CTL_TIME_IO
	+ if (LIST_EMPTY(&lun->ooa_queue))
	+ lun->idle_time += getsbinuptime() - lun->last_busy;
	+#endif
	+ LIST_INSERT_HEAD(&lun->ooa_queue, &ctnio->io_hdr, ooa_links);
	+ }
	+
	+ /* Get command entry and return error if it is unsupported. */
	+ entry = ctl_nvme_validate_command(ctnio);
	+ if (entry == NULL) {
	+ if (lun)
	+ mtx_unlock(&lun->lun_lock);
	+ return;
	+ }
	+
	+ ctnio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK;
	+ ctnio->io_hdr.flags \|= entry->flags & CTL_FLAG_DATA_MASK;
	+
	+ /* All NVMe commands other than IDENTIFY require a LUN. */
	+ if (lun == NULL) {
	+ if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) {
	+ ctnio->io_hdr.flags \|= CTL_FLAG_IS_WAS_ON_RTR;
	+ ctl_enqueue_rtr((union ctl_io *)ctnio);
	+ return;
	+ }
	+
	+ ctl_nvme_set_invalid_namespace(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ CTL_DEBUG_PRINT(("ctl_nvmeio_precheck: bailing out due to invalid LUN\n"));
	+ return;
	+ } else {
	+ /*
	+ * NVMe namespaces can only be backed by T_DIRECT LUNs.
	+ */
	+ if (lun->be_lun->lun_type != T_DIRECT) {
	+ mtx_unlock(&lun->lun_lock);
	+ ctl_nvme_set_invalid_namespace(ctnio);
	+ ctl_done((union ctl_io *)ctnio);
	+ return;
	+ }
	+ }
	+
	+ if (ctl_nvmeio_lun_check(lun, entry, ctnio) != 0) {
	+ mtx_unlock(&lun->lun_lock);
	+ ctl_done((union ctl_io *)ctnio);
	+ return;
	+ }
	+
	+ bio = (union ctl_io *)LIST_NEXT(&ctnio->io_hdr, ooa_links);
	+ switch (ctl_nvme_check_ooa(lun, (union ctl_io *)ctnio, &bio,
	+ &aborted_io)) {
	+ case CTL_ACTION_PASS:
	+ ctnio->io_hdr.flags \|= CTL_FLAG_IS_WAS_ON_RTR;
	+ mtx_unlock(&lun->lun_lock);
	+ ctl_enqueue_rtr((union ctl_io *)ctnio);
	+ break;
	+ case CTL_ACTION_FUSED:
	+ /* Block the second half on the first half. */
	+ ctnio->io_hdr.blocker = bio;
	+ TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
	+ blocked_links);
	+
	+ /* Pass the first half. */
	+ bio->io_hdr.flags \|= CTL_FLAG_IS_WAS_ON_RTR;
	+ mtx_unlock(&lun->lun_lock);
	+ ctl_enqueue_rtr(bio);
	+ break;
	+ case CTL_ACTION_SKIP:
	+ mtx_unlock(&lun->lun_lock);
	+ break;
	+ case CTL_ACTION_BLOCK:
	+ ctnio->io_hdr.blocker = bio;
	+ TAILQ_INSERT_TAIL(&bio->io_hdr.blocked_queue, &ctnio->io_hdr,
	+ blocked_links);
	+ mtx_unlock(&lun->lun_lock);
	+ break;
	+ default:
	+ __assert_unreachable();
	+ }
	+ if (aborted_io != NULL)
	+ ctl_done(aborted_io);
	+}
	+
	+static int
	+ctl_nvmeio(struct ctl_nvmeio *ctnio)
	+{
	+ const struct ctl_nvme_cmd_entry *entry;
	+ int retval;
	+
	+ CTL_DEBUG_PRINT(("ctl_nvmeio %s opc=%02X\n",
	+ ctnio->io_hdr.io_type == CTL_IO_NVME ? "nvm" : "admin",
	+ ctnio->cmd.opc));
	+
	+ entry = ctl_nvme_get_cmd_entry(ctnio);
	+ MPASS(entry != NULL);
	+
	+ /*
	+ * If this I/O has been aborted, just send it straight to
	+ * ctl_done() without executing it.
	+ */
	+ if (ctnio->io_hdr.flags & CTL_FLAG_ABORT) {
	+ ctl_done((union ctl_io *)ctnio);
	+ return (CTL_RETVAL_COMPLETE);
	+ }
	+
	+ /*
	+ * All the checks should have been handled by ctl_nvmeio_precheck().
	+ * We should be clear now to just execute the I/O.
	+ */
	+ retval = entry->execute(ctnio);
	+
	+ return (retval);
	+}
	+
	/*
	* For known CDB types, parse the LBA and length.
	*/
	@@ -11016,7 +11747,7 @@
	* we know for sure that the blocker I/O does no longer count.
	*/
	static void
	-ctl_try_unblock_io(struct ctl_lun lun, union ctl_io io, bool skip)
	+ctl_scsi_try_unblock_io(struct ctl_lun lun, union ctl_io io, bool skip)
	{
	struct ctl_softc *softc = lun->ctl_softc;
	union ctl_io bio, obio;
	@@ -11111,6 +11842,72 @@
	}
	}

	+static void
	+ctl_nvme_try_unblock_io(struct ctl_lun lun, union ctl_io io, bool skip)
	+{
	+ union ctl_io *bio;
	+ const struct ctl_nvme_cmd_entry *entry;
	+
	+ CTL_IO_ASSERT(io, NVME, NVME_ADMIN);
	+
	+ mtx_assert(&lun->lun_lock, MA_OWNED);
	+
	+ if (io->io_hdr.blocker == NULL)
	+ return;
	+
	+ /*
	+ * If this is the second half of a fused operation, it should
	+ * be the only io on the blocked list. If the first half
	+ * failed, complete the second half with an appropriate error.
	+ */
	+ bio = io->io_hdr.blocker;
	+ if (NVMEV(NVME_CMD_FUSE, io->nvmeio.cmd.fuse) == NVME_FUSE_SECOND) {
	+ MPASS(io ==
	+ (union ctl_io *)TAILQ_FIRST(&bio->io_hdr.blocked_queue));
	+ MPASS(TAILQ_NEXT(&io->io_hdr, blocked_links) == NULL);
	+
	+ TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
	+ blocked_links);
	+ if (bio->io_hdr.status != CTL_SUCCESS) {
	+ ctl_nvme_set_failed_fused_command(&io->nvmeio);
	+ ctl_done(io);
	+ return;
	+ }
	+ } else {
	+ /*
	+ * This must be a command that was blocked on the
	+ * second half of a fused operation.
	+ */
	+ MPASS(NVMEV(NVME_CMD_FUSE, bio->nvmeio.cmd.fuse) ==
	+ NVME_FUSE_SECOND);
	+ TAILQ_REMOVE(&bio->io_hdr.blocked_queue, &io->io_hdr,
	+ blocked_links);
	+ }
	+
	+ entry = ctl_nvme_get_cmd_entry(&io->nvmeio);
	+ if (ctl_nvmeio_lun_check(lun, entry, &io->nvmeio) != 0) {
	+ ctl_done(io);
	+ return;
	+ }
	+
	+ io->io_hdr.flags \|= CTL_FLAG_IS_WAS_ON_RTR;
	+ ctl_enqueue_rtr(io);
	+}
	+
	+static void
	+ctl_try_unblock_io(struct ctl_lun lun, union ctl_io io, bool skip)
	+{
	+ switch (io->io_hdr.io_type) {
	+ case CTL_IO_SCSI:
	+ return (ctl_scsi_try_unblock_io(lun, io, skip));
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	+ return (ctl_nvme_try_unblock_io(lun, io, skip));
	+ default:
	+ __assert_unreachable();
	+ }
	+}
	+
	/*
	* Try to unblock I/Os blocked by the specified I/O.
	*
	@@ -11824,6 +12621,7 @@
	*/
	LIST_FOREACH(xioh, &lun->ooa_queue, ooa_links) {
	union ctl_io xio = (union ctl_io )xioh;
	+
	if ((targ_port == UINT32_MAX \|\|
	targ_port == xioh->nexus.targ_port) &&
	(init_id == UINT32_MAX \|\|
	@@ -13196,7 +13994,22 @@
	{
	struct ctl_port *port = CTL_PORT(io);

	- CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
	+ switch (io->io_hdr.io_type) {
	+ case CTL_IO_SCSI:
	+ case CTL_IO_TASK:
	+ CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0]));
	+ break;
	+ case CTL_IO_NVME:
	+ CTL_DEBUG_PRINT(("ctl_queue nvme nvm cmd=%02X\n",
	+ io->nvmeio.cmd.opc));
	+ break;
	+ case CTL_IO_NVME_ADMIN:
	+ CTL_DEBUG_PRINT(("ctl_queue nvme admin cmd=%02X\n",
	+ io->nvmeio.cmd.opc));
	+ break;
	+ default:
	+ break;
	+ }

	#ifdef CTL_TIME_IO
	io->io_hdr.start_time = time_uptime;
	@@ -13210,6 +14023,8 @@
	switch (io->io_hdr.io_type) {
	case CTL_IO_SCSI:
	case CTL_IO_TASK:
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	if (ctl_debug & CTL_DEBUG_CDB)
	ctl_io_print(io);
	ctl_enqueue_incoming(io);
	@@ -13249,6 +14064,12 @@
	ctl_io_print(io);
	ctl_run_task(io);
	break;
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	+ if (ctl_debug & CTL_DEBUG_CDB)
	+ ctl_io_print(io);
	+ ctl_nvmeio_precheck(&io->nvmeio);
	+ break;
	default:
	printf("ctl_run: unknown I/O type %d\n", io->io_hdr.io_type);
	return (EINVAL);
	@@ -13418,19 +14239,41 @@
	if (io != NULL) {
	STAILQ_REMOVE_HEAD(&thr->incoming_queue, links);
	mtx_unlock(&thr->queue_lock);
	- if (io->io_hdr.io_type == CTL_IO_TASK)
	+ switch (io->io_hdr.io_type) {
	+ case CTL_IO_TASK:
	ctl_run_task(io);
	- else
	+ break;
	+ case CTL_IO_SCSI:
	ctl_scsiio_precheck(&io->scsiio);
	+ break;
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	+ ctl_nvmeio_precheck(&io->nvmeio);
	+ break;
	+ default:
	+ __assert_unreachable();
	+ }
	continue;
	}
	io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue);
	if (io != NULL) {
	STAILQ_REMOVE_HEAD(&thr->rtr_queue, links);
	mtx_unlock(&thr->queue_lock);
	- retval = ctl_scsiio(&io->scsiio);
	- if (retval != CTL_RETVAL_COMPLETE)
	- CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
	+ switch (io->io_hdr.io_type) {
	+ case CTL_IO_SCSI:
	+ retval = ctl_scsiio(&io->scsiio);
	+ if (retval != CTL_RETVAL_COMPLETE)
	+ CTL_DEBUG_PRINT(("ctl_scsiio failed\n"));
	+ break;
	+ case CTL_IO_NVME:
	+ case CTL_IO_NVME_ADMIN:
	+ retval = ctl_nvmeio(&io->nvmeio);
	+ if (retval != CTL_RETVAL_COMPLETE)
	+ CTL_DEBUG_PRINT(("ctl_nvmeio failed\n"));
	+ break;
	+ default:
	+ __assert_unreachable();
	+ }
	continue;
	}

	diff --git a/sys/cam/ctl/ctl_nvme_cmd_table.c b/sys/cam/ctl/ctl_nvme_cmd_table.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/cam/ctl/ctl_nvme_cmd_table.c
	@@ -0,0 +1,35 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * Copyright (c) 2023 Chelsio Communications, Inc.
	+ */
	+
	+#include <dev/nvme/nvme.h>
	+
	+#include <cam/ctl/ctl.h>
	+#include <cam/ctl/ctl_ha.h>
	+#include <cam/ctl/ctl_io.h>
	+#include <cam/ctl/ctl_ioctl.h>
	+#include <cam/ctl/ctl_private.h>
	+
	+/* Administrative Command Set (CTL_IO_NVME_ADMIN). */
	+const struct ctl_nvme_cmd_entry nvme_admin_cmd_table[256] =
	+{
	+ [NVME_OPC_IDENTIFY] = { ctl_nvme_identify, CTL_FLAG_DATA_IN \|
	+ CTL_CMD_FLAG_OK_ON_NO_LUN },
	+};
	+
	+/* NVM Command Set (CTL_IO_NVME). */
	+const struct ctl_nvme_cmd_entry nvme_nvm_cmd_table[256] =
	+{
	+ [NVME_OPC_FLUSH] = { ctl_nvme_flush, CTL_FLAG_DATA_NONE },
	+ [NVME_OPC_WRITE] = { ctl_nvme_read_write, CTL_FLAG_DATA_OUT },
	+ [NVME_OPC_READ] = { ctl_nvme_read_write, CTL_FLAG_DATA_IN },
	+ [NVME_OPC_WRITE_UNCORRECTABLE] = { ctl_nvme_write_uncorrectable,
	+ CTL_FLAG_DATA_NONE },
	+ [NVME_OPC_COMPARE] = { ctl_nvme_compare, CTL_FLAG_DATA_OUT },
	+ [NVME_OPC_WRITE_ZEROES] = { ctl_nvme_write_zeroes, CTL_FLAG_DATA_NONE },
	+ [NVME_OPC_DATASET_MANAGEMENT] = { ctl_nvme_dataset_management,
	+ CTL_FLAG_DATA_OUT },
	+ [NVME_OPC_VERIFY] = { ctl_nvme_verify, CTL_FLAG_DATA_NONE },
	+};
	diff --git a/sys/cam/ctl/ctl_private.h b/sys/cam/ctl/ctl_private.h
	--- a/sys/cam/ctl/ctl_private.h
	+++ b/sys/cam/ctl/ctl_private.h
	@@ -78,7 +78,8 @@
	CTL_ACTION_SKIP,
	CTL_ACTION_BLOCK,
	CTL_ACTION_OVERLAP,
	- CTL_ACTION_OVERLAP_TAG
	+ CTL_ACTION_OVERLAP_TAG,
	+ CTL_ACTION_FUSED,
	} ctl_action;

	/*
	@@ -139,6 +140,12 @@
	* after the opcode byte. */
	};

	+/* Only data flags are currently used for NVMe commands. */
	+struct ctl_nvme_cmd_entry {
	+ int (execute)(struct ctl_nvmeio );
	+ ctl_io_flags flags;
	+};
	+
	typedef enum {
	CTL_LUN_NONE = 0x000,
	CTL_LUN_CONTROL = 0x001,
	@@ -412,6 +419,8 @@
	#ifdef _KERNEL

	extern const struct ctl_cmd_entry ctl_cmd_table[256];
	+extern const struct ctl_nvme_cmd_entry nvme_admin_cmd_table[256];
	+extern const struct ctl_nvme_cmd_entry nvme_nvm_cmd_table[256];

	uint32_t ctl_get_initindex(struct ctl_nexus *nexus);
	int ctl_lun_map_init(struct ctl_port *port);
	@@ -459,6 +468,15 @@
	int ctl_report_timestamp(struct ctl_scsiio *ctsio);
	int ctl_get_lba_status(struct ctl_scsiio *ctsio);

	+int ctl_nvme_identify(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_flush(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_read_write(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_write_uncorrectable(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_compare(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_write_zeroes(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_dataset_management(struct ctl_nvmeio *ctnio);
	+int ctl_nvme_verify(struct ctl_nvmeio *ctnio);
	+
	void ctl_tpc_init(struct ctl_softc *softc);
	void ctl_tpc_shutdown(struct ctl_softc *softc);
	void ctl_tpc_lun_init(struct ctl_lun *lun);
	diff --git a/sys/conf/files b/sys/conf/files
	--- a/sys/conf/files
	+++ b/sys/conf/files
	@@ -109,6 +109,7 @@
	cam/ctl/ctl_frontend_iscsi.c optional ctl cfiscsi
	cam/ctl/ctl_ha.c optional ctl
	cam/ctl/ctl_nvme_all.c optional ctl
	+cam/ctl/ctl_nvme_cmd_table.c optional ctl
	cam/ctl/ctl_scsi_all.c optional ctl
	cam/ctl/ctl_tpc.c optional ctl
	cam/ctl/ctl_tpc_local.c optional ctl
	diff --git a/sys/modules/ctl/Makefile b/sys/modules/ctl/Makefile
	--- a/sys/modules/ctl/Makefile
	+++ b/sys/modules/ctl/Makefile
	@@ -13,6 +13,7 @@
	SRCS+= ctl_frontend_ioctl.c
	SRCS+= ctl_ha.c
	SRCS+= ctl_nvme_all.c
	+SRCS+= ctl_nvme_cmd_table.c
	SRCS+= ctl_scsi_all.c
	SRCS+= ctl_tpc.c
	SRCS+= ctl_tpc_local.c

File Metadata

Mime Type: text/plain
Expires: Tue, Oct 1, 1:24 AM (21 h, 49 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 13179528
Default Alt Text: D44720.diff (28 KB)

D44720.diffNo OneTemporaryActions

D44720.diffView Options

File Metadata

Event Timeline

D44720.diff
No OneTemporary
Actions

D44720.diff
View Options