Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102701480
D36275.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D36275.diff
View Options
diff --git a/share/man/man4/inet.4 b/share/man/man4/inet.4
--- a/share/man/man4/inet.4
+++ b/share/man/man4/inet.4
@@ -28,7 +28,7 @@
.\" From: @(#)inet.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd August 1, 2022
+.Dd September 8, 2022
.Dt INET 4
.Os
.Sh NAME
@@ -186,6 +186,8 @@
.It Va fragpackets
Integer: Current number of IPv4 fragment reassembly queue entries
for the VNET (read-only).
+.It Va fragttl
+Integer: time to live for IPv4 packet fragments in the per-VNET reassemby queue.
.It Va loopback_prefixlen
Integer: prefix length of the address space reserved for loopback purposes.
The default is 8, meaning that 127.0.0.0/8 is reserved for loopback,
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
--- a/sys/netinet/ip.h
+++ b/sys/netinet/ip.h
@@ -210,7 +210,6 @@
*/
#define MAXTTL 255 /* maximum time to live (seconds) */
#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
-#define IPFRAGTTL 60 /* time to live for frags, slowhz */
#define IPTTLDEC 1 /* subtracted when forwarding */
#define IP_MSS 576 /* default maximum segment size */
diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c
--- a/sys/netinet/ip_reass.c
+++ b/sys/netinet/ip_reass.c
@@ -75,6 +75,10 @@
struct ipqbucket {
TAILQ_HEAD(ipqhead, ipq) head;
struct mtx lock;
+ struct callout timer;
+#ifdef VIMAGE
+ struct vnet *vnet;
+#endif
int count;
};
@@ -87,6 +91,7 @@
#define IPQ_TRYLOCK(i) mtx_trylock(&V_ipq[i].lock)
#define IPQ_UNLOCK(i) mtx_unlock(&V_ipq[i].lock)
#define IPQ_LOCK_ASSERT(i) mtx_assert(&V_ipq[i].lock, MA_OWNED)
+#define IPQ_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->lock, MA_OWNED)
VNET_DEFINE_STATIC(int, ipreass_maxbucketsize);
#define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize)
@@ -98,10 +103,13 @@
#endif
static int sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
static int sysctl_maxfragbucketsize(SYSCTL_HANDLER_ARGS);
+static int sysctl_fragttl(SYSCTL_HANDLER_ARGS);
static void ipreass_zone_change(void *);
static void ipreass_drain_tomax(void);
static void ipq_free(struct ipqbucket *, struct ipq *);
static struct ipq * ipq_reuse(int);
+static void ipreass_callout(void *);
+static void ipreass_reschedule(struct ipqbucket *);
static inline void
ipq_timeout(struct ipqbucket *bucket, struct ipq *fp)
@@ -117,6 +125,7 @@
IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
ipq_free(bucket, fp);
+ ipreass_reschedule(bucket);
}
/*
@@ -167,9 +176,11 @@
sysctl_maxfragbucketsize, "I",
"Maximum number of IPv4 fragment reassembly queue entries per bucket");
-static u_int ipfragttl = IPFRAGTTL / 2;
-SYSCTL_UINT(_net_inet_ip, OID_AUTO, fragttl, CTLFLAG_RD, &ipfragttl,
- IPFRAGTTL / 2, "IP fragment life time on reassembly queue");
+VNET_DEFINE_STATIC(u_int, ipfragttl) = 30;
+#define V_ipfragttl VNET(ipfragttl)
+SYSCTL_PROC(_net_inet_ip, OID_AUTO, fragttl, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_MPSAFE | CTLFLAG_VNET, NULL, 0, sysctl_fragttl, "IU",
+ "IP fragment life time on reassembly queue (seconds)");
/*
* Take incoming datagram fragment and try to reassemble it into
@@ -311,7 +322,7 @@
V_ipq[hash].count++;
fp->ipq_nfrags = 1;
atomic_add_int(&nfrags, 1);
- fp->ipq_ttl = IPFRAGTTL;
+ fp->ipq_expire = time_uptime + V_ipfragttl;
fp->ipq_p = ip->ip_p;
fp->ipq_id = ip->ip_id;
fp->ipq_src = ip->ip_src;
@@ -322,6 +333,12 @@
else
fp->ipq_maxoff = ntohs(ip->ip_off) + ntohs(ip->ip_len);
m->m_nextpkt = NULL;
+ if (fp == TAILQ_LAST(head, ipqhead))
+ callout_reset_sbt(&V_ipq[hash].timer,
+ SBT_1S * V_ipfragttl, SBT_1S, ipreass_callout,
+ &V_ipq[hash], 0);
+ else
+ MPASS(callout_active(&V_ipq[hash].timer));
goto done;
} else {
/*
@@ -509,6 +526,7 @@
m->m_pkthdr.rcvif = srcifp;
}
IPSTAT_INC(ips_reassembled);
+ ipreass_reschedule(&V_ipq[hash]);
IPQ_UNLOCK(hash);
#ifdef RSS
@@ -560,44 +578,48 @@
}
/*
- * If a timer expires on a reassembly queue, discard it.
+ * Timer expired on a bucket.
+ * There should be at least one ipq to be timed out.
*/
-static struct callout ipreass_callout;
static void
-ipreass_slowtimo(void *arg __unused)
+ipreass_callout(void *arg)
{
- VNET_ITERATOR_DECL(vnet_iter);
- struct ipq *fp, *tmp;
+ struct ipqbucket *bucket = arg;
+ struct ipq *fp;
- if (atomic_load_int(&nfrags) == 0)
- return;
+ IPQ_BUCKET_LOCK_ASSERT(bucket);
+ MPASS(atomic_load_int(&nfrags) > 0);
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- for (int i = 0; i < IPREASS_NHASH; i++) {
- if (TAILQ_EMPTY(&V_ipq[i].head))
- continue;
- IPQ_LOCK(i);
- TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
- if (--fp->ipq_ttl == 0)
- ipq_timeout(&V_ipq[i], fp);
- IPQ_UNLOCK(i);
- }
- CURVNET_RESTORE();
- }
- VNET_LIST_RUNLOCK_NOSLEEP();
+ CURVNET_SET(bucket->vnet);
+ fp = TAILQ_LAST(&bucket->head, ipqhead);
+ KASSERT(fp != NULL && fp->ipq_expire >= time_uptime,
+ ("%s: stray callout on bucket %p", __func__, bucket));
- callout_reset_sbt(&ipreass_callout, SBT_1MS * 500, SBT_1MS * 10,
- ipreass_slowtimo, NULL, 0);
+ while (fp != NULL && fp->ipq_expire >= time_uptime) {
+ ipq_timeout(bucket, fp);
+ fp = TAILQ_LAST(&bucket->head, ipqhead);
+ }
+ ipreass_reschedule(bucket);
+ CURVNET_RESTORE();
}
static void
-ipreass_timer_init(void *arg __unused)
+ipreass_reschedule(struct ipqbucket *bucket)
{
+ struct ipq *fp;
- callout_init(&ipreass_callout, 1);
- callout_reset_sbt(&ipreass_callout, SBT_1MS * 500, SBT_1MS * 10,
- ipreass_slowtimo, NULL, 0);
+ IPQ_BUCKET_LOCK_ASSERT(bucket);
+
+ if ((fp = TAILQ_LAST(&bucket->head, ipqhead)) != NULL) {
+ time_t t;
+
+ /* Protect against time_uptime tick. */
+ t = fp->ipq_expire - time_uptime;
+ t = (t > 0) ? t : 1;
+ callout_reset_sbt(&bucket->timer, SBT_1S * t, SBT_1S,
+ ipreass_callout, bucket, 0);
+ } else
+ callout_stop(&bucket->timer);
}
static void
@@ -614,7 +636,6 @@
IPQ_UNLOCK(i);
}
}
-SYSINIT(ipreass, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipreass_timer_init, NULL);
/*
* Drain off all datagram fragments.
@@ -644,7 +665,11 @@
TAILQ_INIT(&V_ipq[i].head);
mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
MTX_DEF | MTX_DUPOK);
+ callout_init_mtx(&V_ipq[i].timer, &V_ipq[i].lock, 0);
V_ipq[i].count = 0;
+#ifdef VIMAGE
+ V_ipq[i].vnet = curvnet;
+#endif
}
V_ipq_hashseed = arc4random();
V_maxfragsperpacket = 16;
@@ -745,6 +770,7 @@
while (V_ipq[i].count > V_ipreass_maxbucketsize &&
(fp = TAILQ_LAST(&V_ipq[i].head, ipqhead)) != NULL)
ipq_timeout(&V_ipq[i], fp);
+ ipreass_reschedule(&V_ipq[i]);
IPQ_UNLOCK(i);
}
@@ -759,8 +785,10 @@
for (int i = 0; i < IPREASS_NHASH; i++) {
IPQ_LOCK(i);
fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
- if (fp != NULL)
+ if (fp != NULL) {
ipq_timeout(&V_ipq[i], fp);
+ ipreass_reschedule(&V_ipq[i]);
+ }
IPQ_UNLOCK(i);
}
}
@@ -854,6 +882,7 @@
}
TAILQ_REMOVE(&V_ipq[bucket].head, fp, ipq_list);
V_ipq[bucket].count--;
+ ipreass_reschedule(&V_ipq[bucket]);
if (bucket != start)
IPQ_UNLOCK(bucket);
break;
@@ -902,3 +931,24 @@
ipreass_drain_tomax();
return (0);
}
+
+/*
+ * Get or set the IP fragment time to live.
+ */
+static int
+sysctl_fragttl(SYSCTL_HANDLER_ARGS)
+{
+ u_int ttl;
+ int error;
+
+ ttl = V_ipfragttl;
+ error = sysctl_handle_int(oidp, &ttl, 0, req);
+ if (error || !req->newptr)
+ return (error);
+
+ if (ttl < 1 || ttl > MAXTTL)
+ return (EINVAL);
+
+ atomic_store_int(&V_ipfragttl, ttl);
+ return (0);
+}
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -56,18 +56,18 @@
/*
* Ip reassembly queue structure. Each fragment
* being reassembled is attached to one of these structures.
- * They are timed out after ipq_ttl drops to 0, and may also
- * be reclaimed if memory becomes tight.
+ * They are timed out after net.inet.ip.fragttl seconds, and may also be
+ * reclaimed if memory becomes tight.
*/
struct ipq {
TAILQ_ENTRY(ipq) ipq_list; /* to other reass headers */
- u_char ipq_ttl; /* time for reass q to live */
+ time_t ipq_expire; /* time_uptime when ipq expires */
+ u_char ipq_nfrags; /* # frags in this packet */
u_char ipq_p; /* protocol of this fragment */
u_short ipq_id; /* sequence id for reassembly */
int ipq_maxoff; /* total length of packet */
struct mbuf *ipq_frags; /* to ip headers of fragments */
struct in_addr ipq_src,ipq_dst;
- u_char ipq_nfrags; /* # frags in this packet */
struct label *ipq_label; /* MAC label */
};
#endif /* _KERNEL */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Nov 17, 1:35 AM (20 h, 52 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14668797
Default Alt Text
D36275.diff (8 KB)
Attached To
Mode
D36275: ip_reass: retire ipreass_slowtimo() in favor of per-slot callout
Attached
Detach File
Event Timeline
Log In to Comment