Page MenuHomeFreeBSD

D46768.diff
No OneTemporary

D46768.diff

diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c
--- a/sys/dev/e1000/em_txrx.c
+++ b/sys/dev/e1000/em_txrx.c
@@ -455,6 +455,10 @@
"tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
pi->ipi_new_pidx = i;
+ /* Sent data accounting for AIM */
+ txr->tx_bytes += pi->ipi_len;
+ ++txr->tx_packets;
+
return (0);
}
@@ -669,6 +673,7 @@
len = le16toh(rxd->length);
ri->iri_len += len;
+ rxr->rx_bytes += ri->iri_len;
eop = (status & E1000_RXD_STAT_EOP) != 0;
@@ -690,6 +695,8 @@
i++;
} while (!eop);
+ rxr->rx_packets++;
+
if (scctx->isc_capenable & IFCAP_RXCSUM)
em_receive_checksum(status, errors, ri);
@@ -732,6 +739,7 @@
len = le16toh(rxd->wb.upper.length);
ri->iri_len += len;
+ rxr->rx_bytes += ri->iri_len;
eop = (staterr & E1000_RXD_STAT_EOP) != 0;
@@ -752,6 +760,8 @@
i++;
} while (!eop);
+ rxr->rx_packets++;
+
if (scctx->isc_capenable & IFCAP_RXCSUM)
em_receive_checksum(staterr, staterr >> 24, ri);
diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h
--- a/sys/dev/e1000/if_em.h
+++ b/sys/dev/e1000/if_em.h
@@ -1,8 +1,9 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
+ * Copyright (c) 2001-2024, Intel Corporation
* Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
+ * Copyright (c) 2024 Kevin Bowling <kbowling@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -243,16 +244,19 @@
/* Support AutoMediaDetect for Marvell M88 PHY in i354 */
#define IGB_MEDIA_RESET (1 << 0)
-/* Define the starting Interrupt rate per Queue */
-#define EM_INTS_PER_SEC 8000
+/* Define the interrupt rates and ITR helpers */
+#define EM_INTS_4K 4000
+#define EM_INTS_20K 20000
+#define EM_INTS_70K 70000
+#define EM_INTS_DEFAULT 8000
#define EM_INTS_MULTIPLIER 256
#define EM_ITR_DIVIDEND 1000000000
#define EM_INTS_TO_ITR(i) (EM_ITR_DIVIDEND/(i * EM_INTS_MULTIPLIER))
-#define IGB_ITR_DIVIDEND 1000000
-#define IGB_ITR_SHIFT 2
+#define IGB_EITR_DIVIDEND 1000000
+#define IGB_EITR_SHIFT 2
#define IGB_QVECTOR_MASK 0x7FFC
-#define IGB_INTS_TO_EITR(i) (((IGB_ITR_DIVIDEND/i) & IGB_QVECTOR_MASK) << \
- IGB_ITR_SHIFT)
+#define IGB_INTS_TO_EITR(i) (((IGB_EITR_DIVIDEND/i) & IGB_QVECTOR_MASK) << \
+ IGB_EITR_SHIFT)
#define IGB_LINK_ITR 2000
#define I210_LINK_DELAY 1000
@@ -390,7 +394,11 @@
/* Interrupt resources */
void *tag;
struct resource *res;
+
+ /* Soft stats */
unsigned long tx_irq;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
/* Saved csum offloading context information */
int csum_flags;
@@ -426,6 +434,9 @@
unsigned long rx_discarded;
unsigned long rx_packets;
unsigned long rx_bytes;
+
+ /* Next requested ITR latency */
+ u8 rx_nextlatency;
};
struct em_tx_queue {
@@ -441,6 +452,7 @@
u32 me;
u32 msix;
u32 eims;
+ u32 itr_setting;
struct rx_ring rxr;
u64 irqs;
struct if_irq que_irq;
@@ -489,6 +501,7 @@
u32 rx_mbuf_sz;
+ int enable_aim;
/* Management and WOL features */
u32 wol;
bool has_manage;
@@ -512,6 +525,7 @@
u16 link_duplex;
u32 smartspeed;
u32 dmac;
+ u32 pba;
int link_mask;
int tso_automasked;
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -1,8 +1,9 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
+ * Copyright (c) 2001-2024, Intel Corporation
* Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
+ * Copyright (c) 2024 Kevin Bowling <kbowling@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -329,10 +330,12 @@
static int em_get_rs(SYSCTL_HANDLER_ARGS);
static void em_print_debug_info(struct e1000_softc *);
static int em_is_valid_ether_addr(u8 *);
+static void em_newitr(struct e1000_softc *, struct em_rx_queue *,
+ struct tx_ring *, struct rx_ring *);
static bool em_automask_tso(if_ctx_t);
static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
static void em_add_int_delay_sysctl(struct e1000_softc *, const char *,
- const char *, struct em_int_delay_info *, int, int);
+ const char *, struct em_int_delay_info *, int, int);
/* Management and WOL Support */
static void em_init_manageability(struct e1000_softc *);
static void em_release_manageability(struct e1000_softc *);
@@ -545,10 +548,19 @@
SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
"Enable Energy Efficient Ethernet");
+/*
+ * AIM: Adaptive Interrupt Moderation
+ * which means that the interrupt rate is varied over time based on the
+ * traffic for that interrupt vector
+ */
+static int em_enable_aim = 1;
+SYSCTL_INT(_hw_em, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &em_enable_aim,
+ 0, "Enable adaptive interrupt moderation (1=normal, 2=lowlatency)");
+
/*
** Tuneable Interrupt rate
*/
-static int em_max_interrupt_rate = EM_INTS_PER_SEC;
+static int em_max_interrupt_rate = EM_INTS_DEFAULT;
SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
&em_max_interrupt_rate, 0, "Maximum interrupts per second");
@@ -832,6 +844,11 @@
CTLTYPE_INT | CTLFLAG_RW, sc, 0,
em_sysctl_nvm_info, "I", "NVM Information");
+ sc->enable_aim = em_enable_aim;
+ SYSCTL_ADD_INT(ctx_list, child, OID_AUTO, "enable_aim",
+ CTLFLAG_RW, &sc->enable_aim, 0,
+ "Interrupt Moderation (1=normal, 2=lowlatency)");
+
SYSCTL_ADD_PROC(ctx_list, child, OID_AUTO, "fw_version",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
em_sysctl_print_fw_version, "A",
@@ -1437,6 +1454,159 @@
}
}
+enum itr_latency_target {
+ itr_latency_disabled = 0,
+ itr_latency_lowest = 1,
+ itr_latency_low = 2,
+ itr_latency_bulk = 3
+};
+/*********************************************************************
+ *
+ * Helper to calculate next (E)ITR value for AIM
+ *
+ *********************************************************************/
+static void
+em_newitr(struct e1000_softc *sc, struct em_rx_queue *que,
+ struct tx_ring *txr, struct rx_ring *rxr)
+{
+ struct e1000_hw *hw = &sc->hw;
+ u32 newitr;
+ u32 bytes;
+ u32 bytes_packets;
+ u32 packets;
+ u8 nextlatency;
+
+ /* Idle, do nothing */
+ if ((txr->tx_bytes == 0) && (rxr->rx_bytes == 0))
+ return;
+
+ newitr = 0;
+
+ if (sc->enable_aim) {
+ nextlatency = rxr->rx_nextlatency;
+
+ /* Use half default (4K) ITR if sub-gig */
+ if (sc->link_speed != 1000) {
+ newitr = EM_INTS_4K;
+ goto em_set_next_itr;
+ }
+ /* Want at least enough packet buffer for two frames to AIM */
+ if (sc->shared->isc_max_frame_size * 2 > (sc->pba << 10)) {
+ newitr = em_max_interrupt_rate;
+ sc->enable_aim = 0;
+ goto em_set_next_itr;
+ }
+
+ /* Get the largest values from the associated tx and rx ring */
+ if (txr->tx_bytes && txr->tx_packets) {
+ bytes = txr->tx_bytes;
+ bytes_packets = txr->tx_bytes/txr->tx_packets;
+ packets = txr->tx_packets;
+ }
+ if (rxr->rx_bytes && rxr->rx_packets) {
+ bytes = max(bytes, rxr->rx_bytes);
+ bytes_packets = max(bytes_packets, rxr->rx_bytes/rxr->rx_packets);
+ packets = max(packets, rxr->rx_packets);
+ }
+
+ /* Latency state machine */
+ switch (nextlatency) {
+ case itr_latency_disabled: /* Bootstrapping */
+ nextlatency = itr_latency_low;
+ break;
+ case itr_latency_lowest: /* 70k ints/s */
+ /* TSO and jumbo frames */
+ if (bytes_packets > 8000)
+ nextlatency = itr_latency_bulk;
+ else if ((packets < 5) && (bytes > 512))
+ nextlatency = itr_latency_low;
+ break;
+ case itr_latency_low: /* 20k ints/s */
+ if (bytes > 10000) {
+ /* Handle TSO */
+ if (bytes_packets > 8000)
+ nextlatency = itr_latency_bulk;
+ else if ((packets < 10) || (bytes_packets > 1200))
+ nextlatency = itr_latency_bulk;
+ else if (packets > 35)
+ nextlatency = itr_latency_lowest;
+ } else if (bytes_packets > 2000) {
+ nextlatency = itr_latency_bulk;
+ } else if (packets < 3 && bytes < 512) {
+ nextlatency = itr_latency_lowest;
+ }
+ break;
+ case itr_latency_bulk: /* 4k ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ nextlatency = itr_latency_low;
+ } else if (bytes < 1500)
+ nextlatency = itr_latency_low;
+ break;
+ default:
+ nextlatency = itr_latency_low;
+ device_printf(sc->dev, "Unexpected newitr transition %d\n",
+ nextlatency);
+ break;
+ }
+
+ /* Trim itr_latency_lowest for default AIM setting */
+ if (sc->enable_aim == 1 && nextlatency == itr_latency_lowest)
+ nextlatency = itr_latency_low;
+
+ /* Request new latency */
+ rxr->rx_nextlatency = nextlatency;
+ } else {
+ /* We may have toggled to AIM disabled */
+ nextlatency = itr_latency_disabled;
+ rxr->rx_nextlatency = nextlatency;
+ }
+
+ /* ITR state machine */
+ switch(nextlatency) {
+ case itr_latency_lowest:
+ newitr = EM_INTS_70K;
+ break;
+ case itr_latency_low:
+ newitr = EM_INTS_20K;
+ break;
+ case itr_latency_bulk:
+ newitr = EM_INTS_4K;
+ break;
+ case itr_latency_disabled:
+ default:
+ newitr = em_max_interrupt_rate;
+ break;
+ }
+
+em_set_next_itr:
+ if (hw->mac.type >= igb_mac_min) {
+ newitr = IGB_INTS_TO_EITR(newitr);
+
+ if (hw->mac.type == e1000_82575)
+ newitr |= newitr << 16;
+ else
+ newitr |= E1000_EITR_CNT_IGNR;
+
+ if (newitr != que->itr_setting) {
+ que->itr_setting = newitr;
+ E1000_WRITE_REG(hw, E1000_EITR(que->msix), que->itr_setting);
+ }
+ } else {
+ newitr = EM_INTS_TO_ITR(newitr);
+
+ if (newitr != que->itr_setting) {
+ que->itr_setting = newitr;
+ if (hw->mac.type == e1000_82574 && que->msix) {
+ E1000_WRITE_REG(hw,
+ E1000_EITR_82574(que->msix), que->itr_setting);
+ } else {
+ E1000_WRITE_REG(hw, E1000_ITR, que->itr_setting);
+ }
+ }
+ }
+}
+
/*********************************************************************
*
* Fast Legacy/MSI Combined Interrupt Service routine
@@ -1446,10 +1616,14 @@
em_intr(void *arg)
{
struct e1000_softc *sc = arg;
+ struct e1000_hw *hw = &sc->hw;
+ struct em_rx_queue *que = &sc->rx_queues[0];
+ struct tx_ring *txr = &sc->tx_queues[0].txr;
+ struct rx_ring *rxr = &que->rxr;
if_ctx_t ctx = sc->ctx;
u32 reg_icr;
- reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
+ reg_icr = E1000_READ_REG(hw, E1000_ICR);
/* Hot eject? */
if (reg_icr == 0xffffffff)
@@ -1463,7 +1637,7 @@
* Starting with the 82571 chip, bit 31 should be used to
* determine whether the interrupt belongs to us.
*/
- if (sc->hw.mac.type >= e1000_82571 &&
+ if (hw->mac.type >= e1000_82571 &&
(reg_icr & E1000_ICR_INT_ASSERTED) == 0)
return FILTER_STRAY;
@@ -1482,6 +1656,15 @@
if (reg_icr & E1000_ICR_RXO)
sc->rx_overruns++;
+ if (hw->mac.type >= e1000_82540)
+ em_newitr(sc, que, txr, rxr);
+
+ /* Reset state */
+ txr->tx_bytes = 0;
+ txr->tx_packets = 0;
+ rxr->rx_bytes = 0;
+ rxr->rx_packets = 0;
+
return (FILTER_SCHEDULE_THREAD);
}
@@ -1534,9 +1717,20 @@
em_msix_que(void *arg)
{
struct em_rx_queue *que = arg;
+ struct e1000_softc *sc = que->sc;
+ struct tx_ring *txr = &sc->tx_queues[que->msix].txr;
+ struct rx_ring *rxr = &que->rxr;
++que->irqs;
+ em_newitr(sc, que, txr, rxr);
+
+ /* Reset state */
+ txr->tx_bytes = 0;
+ txr->tx_packets = 0;
+ rxr->rx_bytes = 0;
+ rxr->rx_packets = 0;
+
return (FILTER_SCHEDULE_THREAD);
}
@@ -2882,6 +3076,9 @@
if (hw->mac.type >= igb_mac_min)
igb_init_dmac(sc, pba);
+ /* Save the final PBA off if it needs to be used elsewhere i.e. AIM */
+ sc->pba = pba;
+
E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
e1000_get_phy_info(hw);
e1000_check_for_link(hw);
@@ -3741,6 +3938,7 @@
E1000_WRITE_REG(hw, EM_EIAC, sc->ims);
ims_mask |= sc->ims;
}
+
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
E1000_WRITE_FLUSH(hw);
}
@@ -4410,6 +4608,57 @@
return (sysctl_handle_int(oidp, &val, 0, req));
}
+/* Per queue holdoff interrupt rate handler */
+static int
+em_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct em_rx_queue *rque;
+ struct em_tx_queue *tque;
+ struct e1000_hw *hw;
+ int error;
+ u32 reg, usec, rate;
+
+ bool tx = oidp->oid_arg2;
+
+ if (tx) {
+ tque = oidp->oid_arg1;
+ hw = &tque->sc->hw;
+ if (hw->mac.type >= igb_mac_min)
+ reg = E1000_READ_REG(hw, E1000_EITR(tque->me));
+ else if (hw->mac.type == e1000_82574 && tque->msix)
+ reg = E1000_READ_REG(hw, E1000_EITR_82574(tque->me));
+ else
+ reg = E1000_READ_REG(hw, E1000_ITR);
+ } else {
+ rque = oidp->oid_arg1;
+ hw = &rque->sc->hw;
+ if (hw->mac.type >= igb_mac_min)
+ reg = E1000_READ_REG(hw, E1000_EITR(rque->msix));
+ else if (hw->mac.type == e1000_82574 && rque->msix)
+ reg = E1000_READ_REG(hw, E1000_EITR_82574(rque->msix));
+ else
+ reg = E1000_READ_REG(hw, E1000_ITR);
+ }
+
+ if (hw->mac.type < igb_mac_min) {
+ if (reg > 0)
+ rate = EM_INTS_TO_ITR(reg);
+ else
+ rate = 0;
+ } else {
+ usec = (reg & IGB_QVECTOR_MASK);
+ if (usec > 0)
+ rate = IGB_INTS_TO_EITR(usec);
+ else
+ rate = 0;
+ }
+
+ error = sysctl_handle_int(oidp, &rate, 0, req);
+ if (error || !req->newptr)
+ return error;
+ return 0;
+}
+
/*
* Add sysctl variables, one per statistic, to the system.
*/
@@ -4466,6 +4715,11 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+ CTLTYPE_UINT | CTLFLAG_RD, tx_que,
+ true, em_sysctl_interrupt_rate_handler,
+ "IU", "Interrupt Rate");
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
CTLTYPE_UINT | CTLFLAG_RD, sc,
E1000_TDH(txr->me), em_sysctl_reg_handler, "IU",
@@ -4486,6 +4740,11 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+ CTLTYPE_UINT | CTLFLAG_RD, rx_que,
+ false, em_sysctl_interrupt_rate_handler,
+ "IU", "Interrupt Rate");
+
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD, sc,
E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU",
diff --git a/sys/dev/e1000/igb_txrx.c b/sys/dev/e1000/igb_txrx.c
--- a/sys/dev/e1000/igb_txrx.c
+++ b/sys/dev/e1000/igb_txrx.c
@@ -292,6 +292,10 @@
txd->read.cmd_type_len |= htole32(E1000_TXD_CMD_EOP | txd_flags);
pi->ipi_new_pidx = i;
+ /* Sent data accounting for AIM */
+ txr->tx_bytes += pi->ipi_len;
+ ++txr->tx_packets;
+
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 17, 8:56 AM (20 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15836943
Default Alt Text
D46768.diff (14 KB)

Event Timeline