Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107280239
D29729.id87323.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D29729.id87323.diff
View Options
Index: sys/netinet/tcp_hostcache.c
===================================================================
--- sys/netinet/tcp_hostcache.c
+++ sys/netinet/tcp_hostcache.c
@@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG
+ * Copyright (c) 2021 Gleb Smirnoff <glebius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -45,18 +46,18 @@
* more lightwight and only carries information related to packet forwarding.
*
* tcp_hostcache is designed for multiple concurrent access in SMP
- * environments and high contention. All bucket rows have their own lock and
- * thus multiple lookups and modifies can be done at the same time as long as
- * they are in different bucket rows. If a request for insertion of a new
- * record can't be satisfied, it simply returns an empty structure. Nobody
- * and nothing outside of tcp_hostcache.c will ever point directly to any
- * entry in the tcp_hostcache. All communication is done in an
- * object-oriented way and only functions of tcp_hostcache will manipulate
- * hostcache entries. Otherwise, we are unable to achieve good behaviour in
- * concurrent access situations. Since tcp_hostcache is only caching
- * information, there are no fatal consequences if we either can't satisfy
- * any particular request or have to drop/overwrite an existing entry because
- * of bucket limit memory constrains.
+ * environments and high contention. It is a straight hash. Each bucket row
+ * is protected by its own lock for modification. Readers are protected by
+ * SMR. This puts certain restrictions on writers, e.g. a writer shall only
+ * insert a fully populated entry into a row. Writer can't reuse least used
+ * entry if a hash is full. Value updates for an entry shall be atomic.
+ *
+ * TCP stack(s) communication with tcp_hostcache() is done via KBI functions
+ * tcp_hc_*() and the hc_metrics_lite structure.
+ *
+ * Since tcp_hostcache is only caching information, there are no fatal
+ * consequences if we either can't allocate a new entry or have to drop
+ * an existing entry, or return somewhat stale information.
*/
/*
@@ -79,6 +80,7 @@
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/sbuf.h>
+#include <sys/smr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -103,7 +105,6 @@
struct hc_metrics {
/* housekeeping */
TAILQ_ENTRY(hc_metrics) rmx_q;
- struct hc_head *rmx_head; /* head of bucket tail queue */
struct in_addr ip4; /* IP address */
struct in6_addr ip6; /* IP6 address */
uint32_t ip6_zoneid; /* IPv6 scope zone id */
@@ -126,6 +127,7 @@
struct tcp_hostcache {
struct hc_head *hashbase;
uma_zone_t zone;
+ smr_t smr;
u_int hashsize;
u_int hashmask;
u_int hashsalt;
@@ -149,8 +151,8 @@
VNET_DEFINE_STATIC(struct callout, tcp_hc_callout);
#define V_tcp_hc_callout VNET(tcp_hc_callout)
-static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *, bool);
-static struct hc_metrics *tcp_hc_insert(struct in_conninfo *);
+static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *, bool,
+ struct hc_head **);
static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
static int sysctl_tcp_hc_histo(SYSCTL_HANDLER_ARGS);
static int sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS);
@@ -281,8 +283,9 @@
*/
V_tcp_hostcache.zone =
uma_zcreate("hostcache", sizeof(struct hc_metrics),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit);
+ V_tcp_hostcache.smr = uma_zone_get_smr(V_tcp_hostcache.zone);
/*
* Set up periodic cache cleanup.
@@ -315,11 +318,11 @@
/*
* Internal function: look up an entry in the hostcache or return NULL.
*
- * If an entry has been returned, the caller becomes responsible for
- * unlocking the bucket row after he is done reading/modifying the entry.
+ * If looked up for update, returns locked and provides hash row pointer.
+ * If looked up for read, returns in SMR section.
*/
static struct hc_metrics *
-tcp_hc_lookup(struct in_conninfo *inc, bool update)
+tcp_hc_lookup(struct in_conninfo *inc, bool update, struct hc_head **hcp)
{
int hash;
struct hc_head *hc_head;
@@ -337,12 +340,12 @@
hc_head = &V_tcp_hostcache.hashbase[hash];
- /*
- * Acquire lock for this bucket row; we release the lock if we don't
- * find an entry, otherwise the caller has to unlock after he is
- * done.
- */
- THC_LOCK(hc_head);
+ if (update) {
+ KASSERT(hcp != NULL, ("%s: NULL hcp", __func__));
+ *hcp = hc_head;
+ THC_LOCK(hc_head);
+ } else
+ smr_enter(V_tcp_hostcache.smr);
/*
* Iterate through entries in bucket row looking for a match.
@@ -360,10 +363,6 @@
}
}
- /*
- * We were unsuccessful and didn't find anything.
- */
- THC_UNLOCK(hc_head);
return (NULL);
found:
@@ -373,109 +372,8 @@
else
hc_entry->rmx_hits++;
#endif
- hc_entry->rmx_expire = V_tcp_hostcache.expire;
-
- return (hc_entry);
-}
-
-/*
- * Internal function: insert an entry into the hostcache or return NULL if
- * unable to allocate a new one.
- *
- * If an entry has been returned, the caller becomes responsible for
- * unlocking the bucket row after he is done reading/modifying the entry.
- */
-static struct hc_metrics *
-tcp_hc_insert(struct in_conninfo *inc)
-{
- int hash;
- struct hc_head *hc_head;
- struct hc_metrics *hc_entry;
-
- KASSERT(inc != NULL, ("%s: NULL in_conninfo", __func__));
-
- /*
- * Hash the foreign ip address.
- */
- if (inc->inc_flags & INC_ISIPV6)
- hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
- else
- hash = HOSTCACHE_HASH(&inc->inc_faddr);
-
- hc_head = &V_tcp_hostcache.hashbase[hash];
-
- /*
- * Acquire lock for this bucket row; we release the lock if we don't
- * find an entry, otherwise the caller has to unlock after he is
- * done.
- */
- THC_LOCK(hc_head);
-
- /*
- * If the bucket limit is reached, reuse the least-used element.
- */
- if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
- atomic_load_int(&V_tcp_hostcache.cache_count) >= V_tcp_hostcache.cache_limit) {
- hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
- /*
- * At first we were dropping the last element, just to
- * reacquire it in the next two lines again, which isn't very
- * efficient. Instead just reuse the least used element.
- * We may drop something that is still "in-use" but we can be
- * "lossy".
- * Just give up if this bucket row is empty and we don't have
- * anything to replace.
- */
- if (hc_entry == NULL) {
- THC_UNLOCK(hc_head);
- return (NULL);
- }
- TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
- KASSERT(V_tcp_hostcache.hashbase[hash].hch_length > 0 &&
- V_tcp_hostcache.hashbase[hash].hch_length <=
- V_tcp_hostcache.bucket_limit,
- ("tcp_hostcache: bucket length range violated at %u: %u",
- hash, V_tcp_hostcache.hashbase[hash].hch_length));
- V_tcp_hostcache.hashbase[hash].hch_length--;
- atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
- TCPSTAT_INC(tcps_hc_bucketoverflow);
-#if 0
- uma_zfree(V_tcp_hostcache.zone, hc_entry);
-#endif
- } else {
- /*
- * Allocate a new entry, or balk if not possible.
- */
- hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT);
- if (hc_entry == NULL) {
- THC_UNLOCK(hc_head);
- return (NULL);
- }
- }
-
- /*
- * Initialize basic information of hostcache entry.
- */
- bzero(hc_entry, sizeof(*hc_entry));
- if (inc->inc_flags & INC_ISIPV6) {
- hc_entry->ip6 = inc->inc6_faddr;
- hc_entry->ip6_zoneid = inc->inc6_zoneid;
- } else
- hc_entry->ip4 = inc->inc_faddr;
- hc_entry->rmx_head = hc_head;
- hc_entry->rmx_expire = V_tcp_hostcache.expire;
-
- /*
- * Put it upfront.
- */
- TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
- V_tcp_hostcache.hashbase[hash].hch_length++;
- KASSERT(V_tcp_hostcache.hashbase[hash].hch_length <
- V_tcp_hostcache.bucket_limit,
- ("tcp_hostcache: bucket length too high at %u: %u",
- hash, V_tcp_hostcache.hashbase[hash].hch_length));
- atomic_add_int(&V_tcp_hostcache.cache_count, 1);
- TCPSTAT_INC(tcps_hc_added);
+ if (hc_entry->rmx_expire != V_tcp_hostcache.expire)
+ atomic_store_int(&hc_entry->rmx_expire, V_tcp_hostcache.expire);
return (hc_entry);
}
@@ -498,12 +396,13 @@
/*
* Find the right bucket.
*/
- hc_entry = tcp_hc_lookup(inc, false);
+ hc_entry = tcp_hc_lookup(inc, false, NULL);
/*
* If we don't have an existing object.
*/
if (hc_entry == NULL) {
+ smr_exit(V_tcp_hostcache.smr);
bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
return;
}
@@ -516,10 +415,7 @@
hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe;
hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
- /*
- * Unlock bucket row.
- */
- THC_UNLOCK(hc_entry->rmx_head);
+ smr_exit(V_tcp_hostcache.smr);
}
/*
@@ -536,13 +432,15 @@
if (!V_tcp_use_hostcache)
return (0);
- hc_entry = tcp_hc_lookup(inc, false);
+ hc_entry = tcp_hc_lookup(inc, false, NULL);
if (hc_entry == NULL) {
+ smr_exit(V_tcp_hostcache.smr);
return (0);
}
mtu = hc_entry->rmx_mtu;
- THC_UNLOCK(hc_entry->rmx_head);
+ smr_exit(V_tcp_hostcache.smr);
+
return (mtu);
}
@@ -565,75 +463,142 @@
void
tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
{
+ struct hc_head *hc_head;
struct hc_metrics *hc_entry;
+ uint32_t v;
+ bool new;
if (!V_tcp_use_hostcache)
return;
- hc_entry = tcp_hc_lookup(inc, true);
+ hc_entry = tcp_hc_lookup(inc, true, &hc_head);
if (hc_entry == NULL) {
- hc_entry = tcp_hc_insert(inc);
- if (hc_entry == NULL)
+ /*
+ * Try to allocate a new entry. If the bucket limit
+ * is reached, delete the least-used element.
+ */
+ if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
+ atomic_load_int(&V_tcp_hostcache.cache_count) >=
+ V_tcp_hostcache.cache_limit) {
+ hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
+ /*
+ * Give up if this bucket row is empty and we don't
+ * have anything to replace.
+ */
+ if (hc_entry == NULL) {
+ THC_UNLOCK(hc_head);
+ return;
+ }
+ TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
+ KASSERT(hc_head->hch_length > 0 &&
+ hc_head->hch_length <= V_tcp_hostcache.bucket_limit,
+ ("tcp_hostcache: bucket length violated at %p",
+ hc_head));
+ hc_head->hch_length--;
+ atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
+ TCPSTAT_INC(tcps_hc_bucketoverflow);
+ uma_zfree_smr(V_tcp_hostcache.zone, hc_entry);
+ }
+
+ /*
+ * Allocate a new entry, or balk if not possible.
+ */
+ hc_entry = uma_zalloc_smr(V_tcp_hostcache.zone, M_NOWAIT);
+ if (hc_entry == NULL) {
+ THC_UNLOCK(hc_head);
return;
- }
+ }
+
+ /*
+ * Initialize basic information of hostcache entry.
+ */
+ bzero(hc_entry, sizeof(*hc_entry));
+ if (inc->inc_flags & INC_ISIPV6) {
+ hc_entry->ip6 = inc->inc6_faddr;
+ hc_entry->ip6_zoneid = inc->inc6_zoneid;
+ } else
+ hc_entry->ip4 = inc->inc_faddr;
+ hc_entry->rmx_expire = V_tcp_hostcache.expire;
+ new = true;
+ } else
+ new = false;
+ /*
+ * Fill in data. Use atomics, since an existing entry is
+ * accessible by readers in SMR section.
+ */
if (hcml->rmx_mtu != 0) {
- hc_entry->rmx_mtu = hcml->rmx_mtu;
+ atomic_store_32(&hc_entry->rmx_mtu, hcml->rmx_mtu);
}
if (hcml->rmx_rtt != 0) {
if (hc_entry->rmx_rtt == 0)
- hc_entry->rmx_rtt = hcml->rmx_rtt;
+ v = hcml->rmx_rtt;
else
- hc_entry->rmx_rtt = ((uint64_t)hc_entry->rmx_rtt +
+ v = ((uint64_t)hc_entry->rmx_rtt +
(uint64_t)hcml->rmx_rtt) / 2;
+ atomic_store_32(&hc_entry->rmx_rtt, v);
TCPSTAT_INC(tcps_cachedrtt);
}
if (hcml->rmx_rttvar != 0) {
- if (hc_entry->rmx_rttvar == 0)
- hc_entry->rmx_rttvar = hcml->rmx_rttvar;
+ if (hc_entry->rmx_rttvar == 0)
+ v = hcml->rmx_rttvar;
else
- hc_entry->rmx_rttvar = ((uint64_t)hc_entry->rmx_rttvar +
+ v = ((uint64_t)hc_entry->rmx_rttvar +
(uint64_t)hcml->rmx_rttvar) / 2;
+ atomic_store_32(&hc_entry->rmx_rttvar, v);
TCPSTAT_INC(tcps_cachedrttvar);
}
if (hcml->rmx_ssthresh != 0) {
if (hc_entry->rmx_ssthresh == 0)
- hc_entry->rmx_ssthresh = hcml->rmx_ssthresh;
+ v = hcml->rmx_ssthresh;
else
- hc_entry->rmx_ssthresh =
- (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
+ v = (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
+ atomic_store_32(&hc_entry->rmx_ssthresh, v);
TCPSTAT_INC(tcps_cachedssthresh);
}
if (hcml->rmx_cwnd != 0) {
if (hc_entry->rmx_cwnd == 0)
- hc_entry->rmx_cwnd = hcml->rmx_cwnd;
+ v = hcml->rmx_cwnd;
else
- hc_entry->rmx_cwnd = ((uint64_t)hc_entry->rmx_cwnd +
+ v = ((uint64_t)hc_entry->rmx_cwnd +
(uint64_t)hcml->rmx_cwnd) / 2;
+ atomic_store_32(&hc_entry->rmx_cwnd, v);
/* TCPSTAT_INC(tcps_cachedcwnd); */
}
if (hcml->rmx_sendpipe != 0) {
if (hc_entry->rmx_sendpipe == 0)
- hc_entry->rmx_sendpipe = hcml->rmx_sendpipe;
+ v = hcml->rmx_sendpipe;
else
- hc_entry->rmx_sendpipe =
- ((uint64_t)hc_entry->rmx_sendpipe +
+ v = ((uint64_t)hc_entry->rmx_sendpipe +
(uint64_t)hcml->rmx_sendpipe) /2;
+ atomic_store_32(&hc_entry->rmx_sendpipe, v);
/* TCPSTAT_INC(tcps_cachedsendpipe); */
}
if (hcml->rmx_recvpipe != 0) {
if (hc_entry->rmx_recvpipe == 0)
- hc_entry->rmx_recvpipe = hcml->rmx_recvpipe;
+ v = hcml->rmx_recvpipe;
else
- hc_entry->rmx_recvpipe =
- ((uint64_t)hc_entry->rmx_recvpipe +
+ v = ((uint64_t)hc_entry->rmx_recvpipe +
(uint64_t)hcml->rmx_recvpipe) /2;
+ atomic_store_32(&hc_entry->rmx_recvpipe, v);
/* TCPSTAT_INC(tcps_cachedrecvpipe); */
}
- TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
- TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
- THC_UNLOCK(hc_entry->rmx_head);
+ /*
+ * Put it upfront.
+ */
+ if (new) {
+ TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+ hc_head->hch_length++;
+ KASSERT(hc_head->hch_length < V_tcp_hostcache.bucket_limit,
+ ("tcp_hostcache: bucket length too high at %p", hc_head));
+ atomic_add_int(&V_tcp_hostcache.cache_count, 1);
+ TCPSTAT_INC(tcps_hc_added);
+ } else if (TAILQ_FIRST(&hc_head->hch_bucket) != hc_entry) {
+ TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
+ TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
+ }
+ THC_UNLOCK(hc_head);
}
/*
@@ -786,11 +751,12 @@
TAILQ_REMOVE(
&V_tcp_hostcache.hashbase[i].hch_bucket,
hc_entry, rmx_q);
- uma_zfree(V_tcp_hostcache.zone, hc_entry);
+ uma_zfree_smr(V_tcp_hostcache.zone, hc_entry);
V_tcp_hostcache.hashbase[i].hch_length--;
atomic_subtract_int(&V_tcp_hostcache.cache_count, 1);
} else
- hc_entry->rmx_expire -= V_tcp_hostcache.prune;
+ atomic_subtract_int(&hc_entry->rmx_expire,
+ V_tcp_hostcache.prune);
}
THC_UNLOCK(&V_tcp_hostcache.hashbase[i]);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 12, 10:05 PM (6 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15773201
Default Alt Text
D29729.id87323.diff (14 KB)
Attached To
Mode
D29729: SMR for TCP hostcache.
Attached
Detach File
Event Timeline
Log In to Comment