Page MenuHomeFreeBSD

D41866.diff
No OneTemporary

D41866.diff

diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -217,12 +217,15 @@
{
int i;
+ /*
+ * No need to disable queues before failing them. Failing is a superet
+ * of disabling (though pedantically we'd abort the AERs silently with
+ * a different error, though when we fail, that hardly matters).
+ */
ctrlr->is_failed = true;
- nvme_admin_qpair_disable(&ctrlr->adminq);
nvme_qpair_fail(&ctrlr->adminq);
if (ctrlr->ioq != NULL) {
for (i = 0; i < ctrlr->num_io_queues; i++) {
- nvme_io_qpair_disable(&ctrlr->ioq[i]);
nvme_qpair_fail(&ctrlr->ioq[i]);
}
}
@@ -416,34 +419,6 @@
}
}
-static void
-nvme_pre_reset(struct nvme_controller *ctrlr)
-{
- /*
- * Make sure that all the ISRs are done before proceeding with the reset
- * (and also keep any stray interrupts that happen during this process
- * from racing this process). For startup, this is a nop, since the
- * hardware is in a good state. But for recovery, where we randomly
- * reset the hardware, this ensure that we're not racing the ISRs.
- */
- mtx_lock(&ctrlr->adminq.recovery);
- for (int i = 0; i < ctrlr->num_io_queues; i++) {
- mtx_lock(&ctrlr->ioq[i].recovery);
- }
-}
-
-static void
-nvme_post_reset(struct nvme_controller *ctrlr)
-{
- /*
- * Reset complete, unblock ISRs
- */
- mtx_unlock(&ctrlr->adminq.recovery);
- for (int i = 0; i < ctrlr->num_io_queues; i++) {
- mtx_unlock(&ctrlr->ioq[i].recovery);
- }
-}
-
static int
nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
{
@@ -1236,9 +1211,7 @@
int status;
nvme_ctrlr_devctl_log(ctrlr, "RESET", "resetting controller");
- nvme_pre_reset(ctrlr);
status = nvme_ctrlr_hw_reset(ctrlr);
- nvme_post_reset(ctrlr);
if (status == 0)
nvme_ctrlr_start(ctrlr, true);
else
@@ -1725,19 +1698,8 @@
if (ctrlr->is_failed)
return (0);
- nvme_pre_reset(ctrlr);
if (nvme_ctrlr_hw_reset(ctrlr) != 0)
goto fail;
-#ifdef NVME_2X_RESET
- /*
- * Prior to FreeBSD 13.1, FreeBSD's nvme driver reset the hardware twice
- * to get it into a known good state. However, the hardware's state is
- * good and we don't need to do this for proper functioning.
- */
- if (nvme_ctrlr_hw_reset(ctrlr) != 0)
- goto fail;
-#endif
- nvme_post_reset(ctrlr);
/*
* Now that we've reset the hardware, we can restart the controller. Any
@@ -1754,7 +1716,6 @@
* the controller. However, we have to return success for the resume
* itself, due to questionable APIs.
*/
- nvme_post_reset(ctrlr);
nvme_printf(ctrlr, "Failed to reset on resume, failing.\n");
nvme_ctrlr_fail(ctrlr);
(void)atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -150,6 +150,7 @@
enum nvme_recovery {
RECOVERY_NONE = 0, /* Normal operations */
RECOVERY_WAITING, /* waiting for the reset to complete */
+ RECOVERY_FAILED, /* We have failed, no more I/O */
};
struct nvme_qpair {
struct nvme_controller *ctrlr;
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -945,22 +945,38 @@
{
struct nvme_tracker *tr;
+ /*
+ * nvme_complete_tracker must be called without the qpair lock held. It
+ * takes the lock to adjust outstanding_tr list, so make sure we don't
+ * have it yet (since this is a general purpose routine). We take the
+ * lock to make the list traverse safe, but have to drop the lock to
+ * complete any AER. We restart the list scan when we do this to make
+ * this safe. There's interlock with the ISR so we know this tracker
+ * won't be completed twice.
+ */
+ mtx_assert(&qpair->lock, MA_NOTOWNED);
+
+ mtx_lock(&qpair->lock);
tr = TAILQ_FIRST(&qpair->outstanding_tr);
while (tr != NULL) {
if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
+ mtx_unlock(&qpair->lock);
nvme_qpair_manual_complete_tracker(tr,
NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
ERROR_PRINT_NONE);
+ mtx_lock(&qpair->lock);
tr = TAILQ_FIRST(&qpair->outstanding_tr);
} else {
tr = TAILQ_NEXT(tr, tailq);
}
}
+ mtx_unlock(&qpair->lock);
}
void
nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
{
+ mtx_assert(&qpair->lock, MA_NOTOWNED);
nvme_admin_qpair_abort_aers(qpair);
nvme_qpair_destroy(qpair);
@@ -1011,17 +1027,6 @@
mtx_assert(&qpair->recovery, MA_OWNED);
- /*
- * If the controller has failed, give up. We're never going to change
- * state from a failed controller: no further transactions are possible.
- * We go ahead and let the timeout expire in many cases for simplicity.
- */
- if (qpair->ctrlr->is_failed) {
- nvme_printf(ctrlr, "Controller failed, giving up\n");
- qpair->timer_armed = false;
- return;
- }
-
switch (qpair->recovery_state) {
case RECOVERY_NONE:
/*
@@ -1108,6 +1113,11 @@
nvme_printf(ctrlr, "Waiting for reset to complete\n");
idle = false; /* We want to keep polling */
break;
+ case RECOVERY_FAILED:
+ KASSERT(qpair->ctrlr->is_failed,
+ ("Recovery state failed w/o failed controller\n"));
+ idle = true; /* nothing to monitor */
+ break;
}
/*
@@ -1297,6 +1307,8 @@
mtx_assert(&qpair->recovery, MA_OWNED);
if (mtx_initialized(&qpair->lock))
mtx_assert(&qpair->lock, MA_OWNED);
+ KASSERT(qpair->recovery_state != RECOVERY_FAILED,
+ ("Enabling a failed qpair\n"));
qpair->recovery_state = RECOVERY_NONE;
}
@@ -1421,12 +1433,13 @@
nvme_admin_qpair_disable(struct nvme_qpair *qpair)
{
mtx_lock(&qpair->recovery);
- mtx_lock(&qpair->lock);
+ mtx_lock(&qpair->lock);
nvme_qpair_disable(qpair);
+ mtx_unlock(&qpair->lock);
+
nvme_admin_qpair_abort_aers(qpair);
- mtx_unlock(&qpair->lock);
mtx_unlock(&qpair->recovery);
}
@@ -1451,18 +1464,27 @@
if (!mtx_initialized(&qpair->lock))
return;
+ mtx_lock(&qpair->recovery);
+ qpair->recovery_state = RECOVERY_FAILED;
+ mtx_unlock(&qpair->recovery);
+
mtx_lock(&qpair->lock);
+ if (!STAILQ_EMPTY(&qpair->queued_req)) {
+ nvme_printf(qpair->ctrlr, "failing queued i/o\n");
+ }
while (!STAILQ_EMPTY(&qpair->queued_req)) {
req = STAILQ_FIRST(&qpair->queued_req);
STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
- nvme_printf(qpair->ctrlr, "failing queued i/o\n");
mtx_unlock(&qpair->lock);
nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
NVME_SC_ABORTED_BY_REQUEST);
mtx_lock(&qpair->lock);
}
+ if (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
+ nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
+ }
/* Manually abort each outstanding I/O. */
while (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
tr = TAILQ_FIRST(&qpair->outstanding_tr);
@@ -1470,7 +1492,6 @@
* Do not remove the tracker. The abort_tracker path will
* do that for us.
*/
- nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
mtx_unlock(&qpair->lock);
nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC,
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 1, 7:26 PM (21 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
13273915
Default Alt Text
D41866.diff (6 KB)

Event Timeline