Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102833048
D42051.id128114.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
D42051.id128114.diff
View Options
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -150,7 +150,6 @@
enum nvme_recovery {
RECOVERY_NONE = 0, /* Normal operations */
RECOVERY_WAITING, /* waiting for the reset to complete */
- RECOVERY_FAILED, /* We have failed, no more I/O */
};
struct nvme_qpair {
struct nvme_controller *ctrlr;
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -1027,6 +1027,17 @@
mtx_assert(&qpair->recovery, MA_OWNED);
+ /*
+ * If the controller is failed, then stop polling. This ensures that any
+ * failure processing that races with the qpair timeout will fail
+ * safely.
+ */
+ if (qpair->ctrlr->is_failed) {
+ nvme_printf(qpair->ctrlr,
+ "Failed controller, stopping watchdog timeout.\n");
+ return;
+ }
+
switch (qpair->recovery_state) {
case RECOVERY_NONE:
/*
@@ -1120,11 +1131,6 @@
nvme_printf(ctrlr, "Waiting for reset to complete\n");
idle = false; /* We want to keep polling */
break;
- case RECOVERY_FAILED:
- KASSERT(qpair->ctrlr->is_failed,
- ("Recovery state failed w/o failed controller\n"));
- idle = true; /* nothing to monitor */
- break;
}
/*
@@ -1244,11 +1250,21 @@
if (tr == NULL || qpair->recovery_state != RECOVERY_NONE) {
/*
* No tracker is available, or the qpair is disabled due to an
- * in-progress controller-level reset or controller failure. If
- * we lose the race with recovery_state, then we may add an
- * extra request to the queue which will be resubmitted later.
- * We only set recovery_state to NONE with qpair->lock also
- * held.
+ * in-progress controller-level reset. If we lose the race with
+ * recovery_state, then we may add an extra request to the queue
+ * which will be resubmitted later. We only set recovery_state
+ * to NONE with qpair->lock also held, so if we observe that the
+ * state is not NONE, we know it can't transition to NONE below
+ * when we've submitted the request to hardware.
+ *
+ * Also, as part of the failure process, we set recovery_state
+ * to RECOVERY_WAITING, so we check here to see if we've failed
+ * the controller. We set it before we call the qpair_fail
+ * functions, which take out the lock lock before messing with
+ * queued_req. Since we hold that lock, we know it's safe to
+ * either fail directly, or queue the failure should is_failed
+ * be stale. If we lose the race reading is_failed, then
+ * nvme_qpair_fail will fail the queued request.
*/
if (qpair->ctrlr->is_failed) {
@@ -1314,7 +1330,7 @@
mtx_assert(&qpair->recovery, MA_OWNED);
if (mtx_initialized(&qpair->lock))
mtx_assert(&qpair->lock, MA_OWNED);
- KASSERT(qpair->recovery_state != RECOVERY_FAILED,
+ KASSERT(!qpair->ctrlr->is_failed,
("Enabling a failed qpair\n"));
qpair->recovery_state = RECOVERY_NONE;
@@ -1471,10 +1487,6 @@
if (!mtx_initialized(&qpair->lock))
return;
- mtx_lock(&qpair->recovery);
- qpair->recovery_state = RECOVERY_FAILED;
- mtx_unlock(&qpair->recovery);
-
mtx_lock(&qpair->lock);
if (!STAILQ_EMPTY(&qpair->queued_req)) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Nov 18, 6:32 PM (40 m, 54 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14702728
Default Alt Text
D42051.id128114.diff (3 KB)
Attached To
Mode
D42051: nvme: Eliminate RECOVERY_FAILED state
Attached
Detach File
Event Timeline
Log In to Comment