Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F108592937
D41452.id125990.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
5 KB
Referenced Files
None
Subscribers
None
D41452.id125990.diff
View Options
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -425,13 +425,35 @@
TSENTER();
+ /*
+ * Make sure that all the ISRs are done before proceeding with the reset
+ * (and also keep any stray interrupts that happen during this process
+ * from racing this process). For startup, this is a nop, since the
+ * hardware is in a good state. But for recovery, where we randomly
+ * reset the hardware, this ensure that we're not racing the ISRs.
+ */
+ mtx_lock_spin(&ctrlr->adminq.isr_spin);
+ for (int i = 0; i < ctrlr->num_io_queues; i++) {
+ mtx_lock_spin(&ctrlr->ioq[i].isr_spin);
+ }
+
nvme_ctrlr_disable_qpairs(ctrlr);
err = nvme_ctrlr_disable(ctrlr);
if (err != 0)
- return err;
+ goto out;
err = nvme_ctrlr_enable(ctrlr);
+out:
+
+ /*
+ * Reset complete, unblock ISRs
+ */
+ mtx_unlock_spin(&ctrlr->adminq.isr_spin);
+ for (int i = 0; i < ctrlr->num_io_queues; i++) {
+ mtx_unlock_spin(&ctrlr->ioq[i].isr_spin);
+ }
+
TSEXIT();
return (err);
}
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -184,6 +184,7 @@
int64_t num_retries;
int64_t num_failures;
int64_t num_ignored;
+ int64_t num_spinlock_fail;
struct nvme_command *cmd;
struct nvme_completion *cpl;
@@ -201,8 +202,9 @@
struct nvme_tracker **act_tr;
- struct mtx lock __aligned(CACHE_LINE_SIZE);
+ struct mtx isr_spin __aligned(CACHE_LINE_SIZE);
+ struct mtx lock __aligned(CACHE_LINE_SIZE);
} __aligned(CACHE_LINE_SIZE);
struct nvme_namespace {
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -538,6 +538,17 @@
int done = 0;
bool in_panic = dumping || SCHEDULER_STOPPED();
+ /*
+ * Interlock with reset / recovery code. This is an usually uncontended
+ * spinlock to make sure that we drain out of the ISRs before we reset
+ * the card and to prevent races with the recovery process called from
+ * a timeout context.
+ */
+ if (!mtx_trylock_spin(&qpair->isr_spin)) {
+ qpair->num_spinlock_fail++;
+ return (false);
+ }
+
/*
* qpair is not enabled, likely because a controller reset is in
* progress. Ignore the interrupt - any I/O that was associated with
@@ -548,6 +559,7 @@
*/
if (qpair->recovery_state != RECOVERY_NONE) {
qpair->num_ignored++;
+ mtx_unlock_spin(&qpair->isr_spin);
return (false);
}
@@ -673,6 +685,7 @@
qpair->cq_hdbl_off, qpair->cq_head);
}
+ mtx_unlock_spin(&qpair->isr_spin);
return (done != 0);
}
@@ -1007,10 +1020,14 @@
/*
* Next, check to see if we have any completions. If we do,
* we've likely missed an interrupt, but the card is otherwise
- * fine. This will also catch all the commands that are about
- * to timeout (but there's still a tiny race). Since the timeout
- * is long relative to the race between here and the check below,
+ * fine. This will also catch all the commands that are about to
+ * timeout (but there's still a tiny race). Since the timeout is
+ * long relative to the race between here and the check below,
* this is still a win.
+ *
+ * It will also catch misconfigured interrupts. In addition, the
+ * isr_spin lock taken out in nvme_qpair_process_completions
+ * protects against races here, no matter how small.
*/
mtx_unlock(&qpair->lock);
nvme_qpair_process_completions(qpair);
@@ -1019,7 +1036,8 @@
/*
* Somebody else adjusted recovery state while unlocked,
* we should bail. Unlock the qpair and return without
- * doing anything else.
+ * doing anything else. That somebody else has stepped
+ * the state machine into the next stage.
*/
mtx_unlock(&qpair->lock);
return;
@@ -1032,6 +1050,10 @@
now = getsbinuptime();
needs_reset = false;
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) {
+ /*
+ * Skip async commands, they are posted to the card for
+ * an indefinite amount of time and have no deadline.
+ */
if (tr->deadline == SBT_MAX)
continue;
if (now > tr->deadline) {
diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c
--- a/sys/dev/nvme/nvme_sysctl.c
+++ b/sys/dev/nvme/nvme_sysctl.c
@@ -165,6 +165,7 @@
qpair->num_retries = 0;
qpair->num_failures = 0;
qpair->num_ignored = 0;
+ qpair->num_spinlock_fail = 0;
}
static int
@@ -242,6 +243,21 @@
return (sysctl_handle_64(oidp, &num_ignored, 0, req));
}
+static int
+nvme_sysctl_num_spinlock_fail(SYSCTL_HANDLER_ARGS)
+{
+ struct nvme_controller *ctrlr = arg1;
+ int64_t num_spinfail = 0;
+ int i;
+
+ num_spinfail = ctrlr->adminq.num_spinlock_fail;
+
+ for (i = 0; i < ctrlr->num_io_queues; i++)
+ num_spinfail += ctrlr->ioq[i].num_spinlock_fail;
+
+ return (sysctl_handle_64(oidp, &num_spinfail, 0, req));
+}
+
static int
nvme_sysctl_reset_stats(SYSCTL_HANDLER_ARGS)
{
@@ -300,6 +316,9 @@
SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_ignored",
CTLFLAG_RD, &qpair->num_ignored,
"Number of interrupts posted, but were administratively ignored");
+ SYSCTL_ADD_QUAD(ctrlr_ctx, que_list, OID_AUTO, "num_spinlock_fail",
+ CTLFLAG_RD, &qpair->num_spinlock_fail,
+ "Number of times that we can't get a spin lock in the ISR");
SYSCTL_ADD_PROC(ctrlr_ctx, que_list, OID_AUTO,
"dump_debug", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
@@ -368,6 +387,11 @@
ctrlr, 0, nvme_sysctl_num_ignored, "IU",
"Number of interrupts ignored administratively");
+ SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
+ "num_spinlock_fail", CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ ctrlr, 0, nvme_sysctl_num_spinlock_fail, "IU",
+ "Number of times that we can't get a spin lock in the ISR");
+
SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
"reset_stats", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr,
0, nvme_sysctl_reset_stats, "IU", "Reset statistics to zero");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 27, 6:04 PM (4 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16202844
Default Alt Text
D41452.id125990.diff (5 KB)
Attached To
Mode
D41452: nvme: Add exclusion for ISR
Attached
Detach File
Event Timeline
Log In to Comment