D28583.id85607.diff
No OneTemporary
Actions

Size

10 KB

Referenced Files

None

Subscribers

None

D28583.id85607.diff
View Options

	Index: sys/dev/nvme/nvme_ctrlr.c
	===================================================================
	--- sys/dev/nvme/nvme_ctrlr.c
	+++ sys/dev/nvme/nvme_ctrlr.c
	@@ -232,7 +232,8 @@
	mtx_lock(&ctrlr->lock);
	STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
	mtx_unlock(&ctrlr->lock);
	- taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
	+ if (!ctrlr->is_dying)
	+ taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
	}

	static void
	@@ -433,7 +434,8 @@
	*/
	return;

	- taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
	+ if (!ctrlr->is_dying)
	+ taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
	}

	static int
	@@ -1462,6 +1464,8 @@
	{
	int gone, i;

	+ ctrlr->is_dying = true;
	+
	if (ctrlr->resource == NULL)
	goto nores;

	@@ -1481,6 +1485,9 @@
	if (ctrlr->cdev)
	destroy_dev(ctrlr->cdev);

	+ if (ctrlr->taskqueue)
	+ taskqueue_free(ctrlr->taskqueue);
	+
	if (ctrlr->is_initialized) {
	if (!gone) {
	if (ctrlr->hmb_nchunks > 0)
	@@ -1509,9 +1516,6 @@
	if (!gone)
	nvme_ctrlr_disable(ctrlr);

	- if (ctrlr->taskqueue)
	- taskqueue_free(ctrlr->taskqueue);
	-
	if (ctrlr->tag)
	bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);

	Index: sys/dev/nvme/nvme_private.h
	===================================================================
	--- sys/dev/nvme/nvme_private.h
	+++ sys/dev/nvme/nvme_private.h
	@@ -151,7 +151,7 @@
	TAILQ_ENTRY(nvme_tracker) tailq;
	struct nvme_request *req;
	struct nvme_qpair *qpair;
	- struct callout timer;
	+ sbintime_t deadline;
	bus_dmamap_t payload_dma_map;
	uint16_t cid;

	@@ -159,6 +159,12 @@
	bus_addr_t prp_bus_addr;
	};

	+enum nvme_recovery {
	+ RECOVERY_NONE = 0, /* Normal operations */
	+ RECOVERY_START, /* Deadline has passed, start recovering */
	+ RECOVERY_RESET, /* This pass, initiate reset of controller */
	+ RECOVERY_WAITING, /* waiting for the reset to complete */
	+};
	struct nvme_qpair {
	struct nvme_controller *ctrlr;
	uint32_t id;
	@@ -170,6 +176,12 @@
	struct resource *res;
	void *tag;

	+ struct callout timer;
	+ sbintime_t deadline;
	+ bool timer_armed;
	+ enum nvme_recovery recovery_state;
	+ int ticks;
	+
	uint32_t num_entries;
	uint32_t num_trackers;
	uint32_t sq_tdbl_off;
	@@ -201,8 +213,6 @@

	struct nvme_tracker **act_tr;

	- bool is_enabled;
	-
	struct mtx lock __aligned(CACHE_LINE_SIZE);

	} __aligned(CACHE_LINE_SIZE);
	@@ -305,6 +315,7 @@
	uint32_t notification_sent;

	bool is_failed;
	+ bool is_dying;
	STAILQ_HEAD(, nvme_request) fail_req;

	/* Host Memory Buffer */
	Index: sys/dev/nvme/nvme_qpair.c
	===================================================================
	--- sys/dev/nvme/nvme_qpair.c
	+++ sys/dev/nvme/nvme_qpair.c
	@@ -452,7 +452,6 @@
	}

	mtx_lock(&qpair->lock);
	- callout_stop(&tr->timer);

	if (retry) {
	req->retries++;
	@@ -544,7 +543,7 @@
	* progress. Ignore the interrupt - any I/O that was associated with
	* this interrupt will get retried when the reset is complete.
	*/
	- if (!qpair->is_enabled)
	+ if (qpair->recovery_state != RECOVERY_NONE)
	return (false);

	bus_dmamap_sync(qpair->dma_tag, qpair->queuemem_map,
	@@ -746,6 +745,10 @@
	qpair->cpl_bus_addr = queuemem_phys + cmdsz;
	prpmem_phys = queuemem_phys + cmdsz + cplsz;

	+ callout_init(&qpair->timer, 1);
	+ qpair->timer_armed = false;
	+ qpair->recovery_state = RECOVERY_NONE;
	+
	/*
	* Calcuate the stride of the doorbell register. Many emulators set this
	* value to correspond to a cache line. However, some hardware has set
	@@ -783,7 +786,6 @@
	DOMAINSET_PREF(qpair->domain), M_ZERO \| M_WAITOK);
	bus_dmamap_create(qpair->dma_tag_payload, 0,
	&tr->payload_dma_map);
	- callout_init(&tr->timer, 1);
	tr->cid = i;
	tr->qpair = qpair;
	tr->prp = (uint64_t *)prp_list;
	@@ -813,6 +815,8 @@
	{
	struct nvme_tracker *tr;

	+ callout_drain(&qpair->timer);
	+
	if (qpair->tag) {
	bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
	qpair->tag = NULL;
	@@ -892,65 +896,103 @@
	}

	static void
	-nvme_abort_complete(void arg, const struct nvme_completion status)
	+nvme_qpair_timeout(void *arg)
	{
	- struct nvme_tracker *tr = arg;
	+ struct nvme_qpair *qpair = arg;
	+ struct nvme_controller *ctrlr = qpair->ctrlr;
	+ struct nvme_tracker *tr;
	+ struct nvme_tracker *tr_temp;
	+ sbintime_t now;
	+ bool idle;
	+ uint32_t csts;
	+ uint8_t cfs;

	- /*
	- * If cdw0 == 1, the controller was not able to abort the command
	- * we requested. We still need to check the active tracker array,
	- * to cover race where I/O timed out at same time controller was
	- * completing the I/O.
	- */
	- if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
	+ mtx_lock(&qpair->lock);
	+ idle = TAILQ_EMPTY(&qpair->outstanding_tr);
	+again:
	+ switch (qpair->recovery_state) {
	+ case RECOVERY_NONE:
	+ if (idle)
	+ break;
	+ now = getsbinuptime();
	+ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
	+ if (now > tr->deadline && tr->deadline != 0) {
	+ /*
	+ * We're now passed our earliest deadline. We
	+ * need to do expensive things to cope, but next
	+ * time. Flag that and close the door to any
	+ * further processing.
	+ */
	+ qpair->recovery_state = RECOVERY_START;
	+ nvme_printf(ctrlr, "RECOVERY_START %jd vs %jd\n",
	+ (uintmax_t)now, (uintmax_t)tr->deadline);
	+ break;
	+ }
	+ }
	+ break;
	+ case RECOVERY_START:
	/*
	- * An I/O has timed out, and the controller was unable to
	- * abort it for some reason. Construct a fake completion
	- * status, and then complete the I/O's tracker manually.
	+ * Read csts to get value of cfs - controller fatal status.
	+ * If no fatal status, try to call the completion routine, and
	+ * if completes transactions, report a missed interrupt and
	+ * return (this may need to be rate limited). Otherwise, if
	+ * aborts are enabled and the controller is not reporting
	+ * fatal status, abort the command. Otherwise, just reset the
	+ * controller and hope for the best.
	*/
	- nvme_printf(tr->qpair->ctrlr,
	- "abort command failed, aborting command manually\n");
	- nvme_qpair_manual_complete_tracker(tr,
	- NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_ALL);
	+ csts = nvme_mmio_read_4(ctrlr, csts);
	+ cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
	+ if (cfs) {
	+ nvme_printf(ctrlr, "Controller in fatal status, resetting\n");
	+ qpair->recovery_state = RECOVERY_RESET;
	+ goto again;
	+ }
	+ mtx_unlock(&qpair->lock);
	+ if (nvme_qpair_process_completions(qpair)) {
	+ nvme_printf(ctrlr, "Missing interrupt\n");
	+ qpair->recovery_state = RECOVERY_NONE;
	+ // XXX do I need a NOP here to not race?
	+ } else {
	+ nvme_printf(ctrlr, "missed interrupt with nothing complete\n");
	+ qpair->recovery_state = RECOVERY_RESET;
	+ mtx_lock(&qpair->lock);
	+ goto again;
	+ }
	+ mtx_lock(&qpair->lock);
	+ break;
	+ case RECOVERY_RESET:
	+ /*
	+ * If we get here due to a possible surprise hot-unplug event,
	+ * then we let nvme_ctrlr_reset confirm and fail the
	+ * controller.
	+ */
	+ nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
	+ cfs ? " and fatal error status" : "");
	+ nvme_printf(ctrlr, "RECOVERY_WAITING\n");
	+ qpair->recovery_state = RECOVERY_WAITING;
	+ nvme_ctrlr_reset(ctrlr);
	+ break;
	+ case RECOVERY_WAITING:
	+ nvme_printf(ctrlr, "waiting\n");
	+ break;
	}
	-}
	-
	-static void
	-nvme_timeout(void *arg)
	-{
	- struct nvme_tracker *tr = arg;
	- struct nvme_qpair *qpair = tr->qpair;
	- struct nvme_controller *ctrlr = qpair->ctrlr;
	- uint32_t csts;
	- uint8_t cfs;

	/*
	- * Read csts to get value of cfs - controller fatal status.
	- * If no fatal status, try to call the completion routine, and
	- * if completes transactions, report a missed interrupt and
	- * return (this may need to be rate limited). Otherwise, if
	- * aborts are enabled and the controller is not reporting
	- * fatal status, abort the command. Otherwise, just reset the
	- * controller and hope for the best.
	+ * Rearm the timeout.
	*/
	- csts = nvme_mmio_read_4(ctrlr, csts);
	- cfs = (csts >> NVME_CSTS_REG_CFS_SHIFT) & NVME_CSTS_REG_CFS_MASK;
	- if (cfs == 0 && nvme_qpair_process_completions(qpair)) {
	- nvme_printf(ctrlr, "Missing interrupt\n");
	- return;
	- }
	- if (ctrlr->enable_aborts && cfs == 0) {
	- nvme_printf(ctrlr, "Aborting command due to a timeout.\n");
	- nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
	- nvme_abort_complete, tr);
	+ if (!idle) {
	+ callout_reset_on(&qpair->timer, qpair->ticks,
	+ nvme_qpair_timeout, qpair, qpair->cpu);
	} else {
	- nvme_printf(ctrlr, "Resetting controller due to a timeout%s.\n",
	- (csts == NVME_GONE) ? " and possible hot unplug" :
	- (cfs ? " and fatal error status" : ""));
	- nvme_ctrlr_reset(ctrlr);
	+ qpair->timer_armed = false;
	}
	+ mtx_unlock(&qpair->lock);
	}

	+/*
	+ * Submit the tracker to the hardware. Must already be in the
	+ * outstanding queue when called.
	+ */
	void
	nvme_qpair_submit_tracker(struct nvme_qpair qpair, struct nvme_tracker tr)
	{
	@@ -967,12 +1009,18 @@

	if (req->timeout) {
	if (req->cb_fn == nvme_completion_poll_cb)
	- timeout = hz;
	+ timeout = 1;
	else
	- timeout = ctrlr->timeout_period * hz;
	- callout_reset_on(&tr->timer, timeout, nvme_timeout, tr,
	- qpair->cpu);
	- }
	+ timeout = ctrlr->timeout_period;
	+ tr->deadline = getsbinuptime() + timeout * SBT_1S;
	+ if (!qpair->timer_armed) {
	+ qpair->ticks = hz / 2;
	+ qpair->timer_armed = true;
	+ callout_reset_on(&qpair->timer, qpair->ticks,
	+ nvme_qpair_timeout, qpair, qpair->cpu);
	+ }
	+ } else
	+ tr->deadline = SBT_MAX;

	/* Copy the command from the tracker to the submission queue. */
	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
	@@ -1047,7 +1095,7 @@
	tr = TAILQ_FIRST(&qpair->free_tr);
	req->qpair = qpair;

	- if (tr == NULL \|\| !qpair->is_enabled) {
	+ if (tr == NULL \|\| qpair->recovery_state != RECOVERY_NONE) {
	/*
	* No tracker is available, or the qpair is disabled due to
	* an in-progress controller-level reset or controller
	@@ -1076,6 +1124,8 @@

	TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
	TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
	+ if (!qpair->timer_armed)
	+ tr->deadline = SBT_MAX;
	tr->req = req;

	switch (req->type) {
	@@ -1144,8 +1194,9 @@
	static void
	nvme_qpair_enable(struct nvme_qpair *qpair)
	{
	+ mtx_assert(&qpair->lock, MA_OWNED);

	- qpair->is_enabled = true;
	+ qpair->recovery_state = RECOVERY_NONE;
	}

	void
	@@ -1188,7 +1239,9 @@
	NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
	}

	+ mtx_lock(&qpair->lock);
	nvme_qpair_enable(qpair);
	+ mtx_unlock(&qpair->lock);
	}

	void
	@@ -1231,12 +1284,13 @@
	static void
	nvme_qpair_disable(struct nvme_qpair *qpair)
	{
	- struct nvme_tracker *tr;
	+ struct nvme_tracker tr, tr_temp;

	- qpair->is_enabled = false;
	mtx_lock(&qpair->lock);
	- TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
	- callout_stop(&tr->timer);
	+ qpair->recovery_state = RECOVERY_WAITING;
	+ TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
	+ tr->deadline = SBT_MAX;
	+ }
	mtx_unlock(&qpair->lock);
	}

File Metadata

Mime Type: text/plain
Expires: Sun, Jan 19, 5:55 PM (19 h, 25 s)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 15953597
Default Alt Text: D28583.id85607.diff (10 KB)

D28583.id85607.diffNo OneTemporaryActions

D28583.id85607.diffView Options

File Metadata

Event Timeline

D28583.id85607.diff
No OneTemporary
Actions

D28583.id85607.diff
View Options