Page MenuHomeFreeBSD

D47053.id144621.diff
No OneTemporary

D47053.id144621.diff

Index: sys/dev/igc/if_igc.h
===================================================================
--- sys/dev/igc/if_igc.h
+++ sys/dev/igc/if_igc.h
@@ -1,8 +1,8 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
+ * Copyright (c) 2001-2024, Intel Corporation
* Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
* Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
@@ -163,6 +163,17 @@
#define IGC_TX_PTHRESH 8
#define IGC_TX_HTHRESH 1
+/* Define the interrupt rates and EITR helpers */
+#define IGC_INTS_4K 4000
+#define IGC_INTS_20K 20000
+#define IGC_INTS_70K 70000
+#define IGC_INTS_DEFAULT 8000
+#define IGC_EITR_DIVIDEND 1000000
+#define IGC_EITR_SHIFT 2
+#define IGC_QVECTOR_MASK 0x7FFC
+#define IGC_INTS_TO_EITR(i) (((IGC_EITR_DIVIDEND/i) & IGC_QVECTOR_MASK) << \
+ IGC_EITR_SHIFT)
+
/*
* TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
* multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
@@ -218,7 +229,12 @@
/* Interrupt resources */
void *tag;
struct resource *res;
- unsigned long tx_irq;
+
+ /* Soft stats */
+ unsigned long tx_irq;
+ unsigned long tx_packets;
+ unsigned long tx_bytes;
+
/* Saved csum offloading context information */
int csum_flags;
@@ -253,6 +269,9 @@
unsigned long rx_discarded;
unsigned long rx_packets;
unsigned long rx_bytes;
+
+ /* Next requested ITR latency */
+ u8 rx_nextlatency;
};
struct igc_tx_queue {
@@ -268,6 +287,7 @@
u32 me;
u32 msix;
u32 eims;
+ u32 eitr_setting;
struct rx_ring rxr;
u64 irqs;
struct if_irq que_irq;
@@ -315,6 +335,8 @@
u32 rx_mbuf_sz;
+ int enable_aim;
+
/* Management and WOL features */
u32 wol;
@@ -328,6 +350,7 @@
u16 link_duplex;
u32 smartspeed;
u32 dmac;
+ u32 pba;
int link_mask;
u64 que_mask;
Index: sys/dev/igc/if_igc.c
===================================================================
--- sys/dev/igc/if_igc.c
+++ sys/dev/igc/if_igc.c
@@ -1,8 +1,8 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
+ * Copyright (c) 2001-2024, Intel Corporation
* Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
- * All rights reserved.
* Copyright (c) 2021 Rubicon Communications, LLC (Netgate)
*
* Redistribution and use in source and binary forms, with or without
@@ -125,6 +125,8 @@
static int igc_get_rs(SYSCTL_HANDLER_ARGS);
static void igc_print_debug_info(struct igc_adapter *);
static int igc_is_valid_ether_addr(u8 *);
+static void igc_newitr(struct igc_adapter *, struct igc_rx_queue *,
+ struct tx_ring *, struct rx_ring *);
/* Management and WOL Support */
static void igc_get_hw_control(struct igc_adapter *);
static void igc_release_hw_control(struct igc_adapter *);
@@ -238,10 +240,19 @@
SYSCTL_INT(_hw_igc, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &igc_eee_setting, 0,
"Enable Energy Efficient Ethernet");
+/*
+ * AIM: Adaptive Interrupt Moderation
+ * which means that the interrupt rate is varied over time based on the
+ * traffic for that interrupt vector
+ */
+static int igc_enable_aim = 1;
+SYSCTL_INT(_hw_igc, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igc_enable_aim,
+ 0, "Enable adaptive interrupt moderation (1=normal, 2=lowlatency)");
+
/*
** Tuneable Interrupt rate
*/
-static int igc_max_interrupt_rate = 20000;
+static int igc_max_interrupt_rate = IGC_INTS_DEFAULT;
SYSCTL_INT(_hw_igc, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
&igc_max_interrupt_rate, 0, "Maximum interrupts per second");
@@ -444,6 +455,13 @@
OID_AUTO, "nvm", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
adapter, 0, igc_sysctl_nvm_info, "I", "NVM Information");
+ adapter->enable_aim = igc_enable_aim;
+ SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "enable_aim", CTLFLAG_RW,
+ &adapter->enable_aim, 0,
+ "Interrupt Moderation (1=normal, 2=lowlatency)");
+
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD,
@@ -816,6 +834,142 @@
igc_set_eee_i225(&adapter->hw, true, true, true);
}
+enum itr_latency_target {
+ itr_latency_disabled = 0,
+ itr_latency_lowest = 1,
+ itr_latency_low = 2,
+ itr_latency_bulk = 3
+};
+/*********************************************************************
+ *
+ * Helper to calculate next EITR value for AIM
+ *
+ *********************************************************************/
+static void
+igc_newitr(struct igc_adapter *sc, struct igc_rx_queue *que,
+ struct tx_ring *txr, struct rx_ring *rxr)
+{
+ struct igc_hw *hw = &sc->hw;
+ u32 newitr;
+ u32 bytes;
+ u32 bytes_packets;
+ u32 packets;
+ u8 nextlatency;
+
+ /* Idle, do nothing */
+ if ((txr->tx_bytes == 0) && (rxr->rx_bytes == 0))
+ return;
+
+ newitr = 0;
+
+ if (sc->enable_aim) {
+ nextlatency = rxr->rx_nextlatency;
+
+ /* Use half default (4K) ITR if sub-gig */
+ if (sc->link_speed != 1000) {
+ newitr = IGC_INTS_4K;
+ goto igc_set_next_itr;
+ }
+ /* Want at least enough packet buffer for two frames to AIM */
+ if (sc->shared->isc_max_frame_size * 2 > (sc->pba << 10)) {
+ newitr = igc_max_interrupt_rate;
+ sc->enable_aim = 0;
+ goto igc_set_next_itr;
+ }
+
+ /* Get the largest values from the associated tx and rx ring */
+ if (txr->tx_bytes && txr->tx_packets) {
+ bytes = txr->tx_bytes;
+ bytes_packets = txr->tx_bytes/txr->tx_packets;
+ packets = txr->tx_packets;
+ }
+ if (rxr->rx_bytes && rxr->rx_packets) {
+ bytes = max(bytes, rxr->rx_bytes);
+ bytes_packets = max(bytes_packets, rxr->rx_bytes/rxr->rx_packets);
+ packets = max(packets, rxr->rx_packets);
+ }
+
+ /* Latency state machine */
+ switch (nextlatency) {
+ case itr_latency_disabled: /* Bootstrapping */
+ nextlatency = itr_latency_low;
+ break;
+ case itr_latency_lowest: /* 70k ints/s */
+ /* TSO and jumbo frames */
+ if (bytes_packets > 8000)
+ nextlatency = itr_latency_bulk;
+ else if ((packets < 5) && (bytes > 512))
+ nextlatency = itr_latency_low;
+ break;
+ case itr_latency_low: /* 20k ints/s */
+ if (bytes > 10000) {
+ /* Handle TSO */
+ if (bytes_packets > 8000)
+ nextlatency = itr_latency_bulk;
+ else if ((packets < 10) || (bytes_packets > 1200))
+ nextlatency = itr_latency_bulk;
+ else if (packets > 35)
+ nextlatency = itr_latency_lowest;
+ } else if (bytes_packets > 2000) {
+ nextlatency = itr_latency_bulk;
+ } else if (packets < 3 && bytes < 512) {
+ nextlatency = itr_latency_lowest;
+ }
+ break;
+ case itr_latency_bulk: /* 4k ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ nextlatency = itr_latency_low;
+ } else if (bytes < 1500)
+ nextlatency = itr_latency_low;
+ break;
+ default:
+ nextlatency = itr_latency_low;
+ device_printf(sc->dev, "Unexpected newitr transition %d\n",
+ nextlatency);
+ break;
+ }
+
+ /* Trim itr_latency_lowest for default AIM setting */
+ if (sc->enable_aim == 1 && nextlatency == itr_latency_lowest)
+ nextlatency = itr_latency_low;
+
+ /* Request new latency */
+ rxr->rx_nextlatency = nextlatency;
+ } else {
+ /* We may have toggled to AIM disabled */
+ nextlatency = itr_latency_disabled;
+ rxr->rx_nextlatency = nextlatency;
+ }
+
+ /* ITR state machine */
+ switch(nextlatency) {
+ case itr_latency_lowest:
+ newitr = IGC_INTS_70K;
+ break;
+ case itr_latency_low:
+ newitr = IGC_INTS_20K;
+ break;
+ case itr_latency_bulk:
+ newitr = IGC_INTS_4K;
+ break;
+ case itr_latency_disabled:
+ default:
+ newitr = igc_max_interrupt_rate;
+ break;
+ }
+
+igc_set_next_itr:
+ newitr = IGC_INTS_TO_EITR(newitr);
+
+ newitr |= IGC_EITR_CNT_IGNR;
+
+ if (newitr != que->eitr_setting) {
+ que->eitr_setting = newitr;
+ IGC_WRITE_REG(hw, IGC_EITR(que->msix), que->eitr_setting);
+ }
+}
+
/*********************************************************************
*
* Fast Legacy/MSI Combined Interrupt Service routine
@@ -825,10 +979,14 @@
igc_intr(void *arg)
{
struct igc_adapter *adapter = arg;
+ struct igc_hw *hw = &adapter->hw;
+ struct igc_rx_queue *que = &adapter->rx_queues[0];
+ struct tx_ring *txr = &adapter->tx_queues[0].txr;
+ struct rx_ring *rxr = &que->rxr;
if_ctx_t ctx = adapter->ctx;
u32 reg_icr;
- reg_icr = IGC_READ_REG(&adapter->hw, IGC_ICR);
+ reg_icr = IGC_READ_REG(hw, IGC_ICR);
/* Hot eject? */
if (reg_icr == 0xffffffff)
@@ -856,6 +1014,14 @@
if (reg_icr & IGC_ICR_RXO)
adapter->rx_overruns++;
+ igc_newitr(adapter, que, txr, rxr);
+
+ /* Reset state */
+ txr->tx_bytes = 0;
+ txr->tx_packets = 0;
+ rxr->rx_bytes = 0;
+ rxr->rx_packets = 0;
+
return (FILTER_SCHEDULE_THREAD);
}
@@ -888,9 +1054,20 @@
igc_msix_que(void *arg)
{
struct igc_rx_queue *que = arg;
+ struct igc_adapter *sc = que->adapter;
+ struct tx_ring *txr = &sc->tx_queues[que->msix].txr;
+ struct rx_ring *rxr = &que->rxr;
++que->irqs;
+ igc_newitr(sc, que, txr, rxr);
+
+ /* Reset state */
+ txr->tx_bytes = 0;
+ txr->tx_packets = 0;
+ rxr->rx_bytes = 0;
+ rxr->rx_packets = 0;
+
return (FILTER_SCHEDULE_THREAD);
}
@@ -1395,7 +1572,7 @@
/* Set the starting interrupt rate */
if (igc_max_interrupt_rate > 0)
- newitr = (4000000 / igc_max_interrupt_rate) & 0x7FFC;
+ newitr = IGC_INTS_TO_EITR(igc_max_interrupt_rate);
newitr |= IGC_EITR_CNT_IGNR;
@@ -1608,6 +1785,9 @@
/* Setup DMA Coalescing */
igc_init_dmac(adapter, pba);
+ /* Save the final PBA off if it needs to be used elsewhere i.e. AIM */
+ adapter->pba = pba;
+
IGC_WRITE_REG(hw, IGC_VET, ETHERTYPE_VLAN);
igc_get_phy_info(hw);
igc_check_for_link(hw);
@@ -2380,6 +2560,40 @@
return (sysctl_handle_int(oidp, &val, 0, req));
}
+/* Per queue holdoff interrupt rate handler */
+static int
+igc_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
+{
+ struct igc_rx_queue *rque;
+ struct igc_tx_queue *tque;
+ struct igc_hw *hw;
+ int error;
+ u32 reg, usec, rate;
+
+ bool tx = oidp->oid_arg2;
+
+ if (tx) {
+ tque = oidp->oid_arg1;
+ hw = &tque->adapter->hw;
+ reg = IGC_READ_REG(hw, IGC_EITR(tque->me));
+ } else {
+ rque = oidp->oid_arg1;
+ hw = &rque->adapter->hw;
+ reg = IGC_READ_REG(hw, IGC_EITR(rque->msix));
+ }
+
+ usec = (reg & IGC_QVECTOR_MASK);
+ if (usec > 0)
+ rate = IGC_INTS_TO_EITR(usec);
+ else
+ rate = 0;
+
+ error = sysctl_handle_int(oidp, &rate, 0, req);
+ if (error || !req->newptr)
+ return error;
+ return 0;
+}
+
/*
* Add sysctl variables, one per statistic, to the system.
*/
@@ -2436,6 +2650,10 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+ CTLTYPE_UINT | CTLFLAG_RD, tx_que,
+ true, igc_sysctl_interrupt_rate_handler, "IU",
+ "Interrupt Rate");
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter,
IGC_TDH(txr->me), igc_sysctl_reg_handler, "IU",
@@ -2456,6 +2674,10 @@
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX Queue Name");
queue_list = SYSCTL_CHILDREN(queue_node);
+ SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
+ CTLTYPE_UINT | CTLFLAG_RD, rx_que,
+ false, igc_sysctl_interrupt_rate_handler, "IU",
+ "Interrupt Rate");
SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, adapter,
IGC_RDH(rxr->me), igc_sysctl_reg_handler, "IU",
Index: sys/dev/igc/igc_txrx.c
===================================================================
--- sys/dev/igc/igc_txrx.c
+++ sys/dev/igc/igc_txrx.c
@@ -316,6 +316,10 @@
txd->read.cmd_type_len |= htole32(IGC_ADVTXD_DCMD_EOP | txd_flags);
pi->ipi_new_pidx = i;
+ /* Sent data accounting for AIM */
+ txr->tx_bytes += pi->ipi_len;
+ ++txr->tx_packets;
+
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Wed, Feb 5, 7:20 PM (4 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16479369
Default Alt Text
D47053.id144621.diff (11 KB)

Event Timeline