Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F102715716
D26254.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
32 KB
Referenced Files
None
Subscribers
None
D26254.diff
View Options
Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files
+++ head/sys/conf/files
@@ -4571,6 +4571,7 @@
compile-with "${LINUXKPI_C}"
# OpenFabrics Enterprise Distribution (Infiniband)
+net/if_infiniband.c optional ofed
ofed/drivers/infiniband/core/ib_addr.c optional ofed \
compile-with "${OFED_C}"
ofed/drivers/infiniband/core/ib_agent.c optional ofed \
Index: head/sys/modules/Makefile
===================================================================
--- head/sys/modules/Makefile
+++ head/sys/modules/Makefile
@@ -154,6 +154,7 @@
${_if_gif} \
${_if_gre} \
${_if_me} \
+ if_infiniband \
if_lagg \
${_if_ndis} \
${_if_stf} \
Index: head/sys/modules/if_infiniband/Makefile
===================================================================
--- head/sys/modules/if_infiniband/Makefile
+++ head/sys/modules/if_infiniband/Makefile
@@ -0,0 +1,10 @@
+# $FreeBSD$
+
+.PATH: ${SRCTOP}/sys/net
+
+KMOD= if_infiniband
+SRCS= if_infiniband.c \
+ opt_inet.h \
+ opt_inet6.h
+
+.include <bsd.kmod.mk>
Index: head/sys/net/if_infiniband.c
===================================================================
--- head/sys/net/if_infiniband.c
+++ head/sys/net/if_infiniband.c
@@ -0,0 +1,538 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/eventhandler.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/devctl.h>
+#include <sys/module.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/ethernet.h>
+#include <net/infiniband.h>
+#include <net/bpf.h>
+#include <net/if_llatbl.h>
+#include <net/netisr.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/if_media.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip6.h>
+
+#include <netinet6/in6_var.h>
+#include <netinet6/nd6.h>
+
+#include <security/mac/mac_framework.h>
+
+#ifdef INET
+static inline void
+infiniband_ipv4_multicast_map(uint32_t addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ addr = ntohl(addr);
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x40;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ buf[10] = 0;
+ buf[11] = 0;
+ buf[12] = 0;
+ buf[13] = 0;
+ buf[14] = 0;
+ buf[15] = 0;
+ buf[16] = (addr >> 24) & 0xff;
+ buf[17] = (addr >> 16) & 0xff;
+ buf[18] = (addr >> 8) & 0xff;
+ buf[19] = addr & 0xff;
+}
+#endif
+
+#ifdef INET6
+static inline void
+infiniband_ipv6_multicast_map(const struct in6_addr *addr,
+ const uint8_t *broadcast, uint8_t *buf)
+{
+ uint8_t scope;
+
+ scope = broadcast[5] & 0xF;
+
+ buf[0] = 0;
+ buf[1] = 0xff;
+ buf[2] = 0xff;
+ buf[3] = 0xff;
+ buf[4] = 0xff;
+ buf[5] = 0x10 | scope;
+ buf[6] = 0x60;
+ buf[7] = 0x1b;
+ buf[8] = broadcast[8];
+ buf[9] = broadcast[9];
+ memcpy(&buf[10], &addr->s6_addr[6], 10);
+}
+#endif
+
+/*
+ * This is for clients that have an infiniband_header in the mbuf.
+ */
+void
+infiniband_bpf_mtap(struct ifnet *ifp, struct mbuf *mb)
+{
+ struct infiniband_header *ibh;
+ struct ether_header eh;
+
+ if (mb->m_len < sizeof(*ibh))
+ return;
+
+ ibh = mtod(mb, struct infiniband_header *);
+ eh.ether_type = ibh->ib_protocol;
+ memset(eh.ether_shost, 0, ETHER_ADDR_LEN);
+ memcpy(eh.ether_dhost, ibh->ib_hwaddr + 4, ETHER_ADDR_LEN);
+ mb->m_data += sizeof(*ibh);
+ mb->m_len -= sizeof(*ibh);
+ mb->m_pkthdr.len -= sizeof(*ibh);
+ bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
+ mb->m_data -= sizeof(*ibh);
+ mb->m_len += sizeof(*ibh);
+ mb->m_pkthdr.len += sizeof(*ibh);
+}
+
+/*
+ * Infiniband output routine.
+ */
+static int
+infiniband_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ uint8_t edst[INFINIBAND_ADDR_LEN];
+#if defined(INET) || defined(INET6)
+ struct llentry *lle = NULL;
+#endif
+ struct infiniband_header *ibh;
+ int error = 0;
+ uint16_t type;
+ bool is_gw;
+
+ NET_EPOCH_ASSERT();
+
+ is_gw = ((ro != NULL) && (ro->ro_flags & RT_HAS_GW) != 0);
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error)
+ goto bad;
+#endif
+
+ M_PROFILE(m);
+ if (ifp->if_flags & IFF_MONITOR) {
+ error = ENETDOWN;
+ goto bad;
+ }
+ if (!((ifp->if_flags & IFF_UP) &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
+ error = ENETDOWN;
+ goto bad;
+ }
+
+ switch (dst->sa_family) {
+ case AF_LINK:
+ goto output;
+#ifdef INET
+ case AF_INET:
+ if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv4_multicast_map(
+ ((const struct sockaddr_in *)dst)->sin_addr.s_addr,
+ ifp->if_broadcastaddr, edst);
+ } else {
+ error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IP);
+ break;
+ case AF_ARP: {
+ struct arphdr *ah;
+
+ if (m->m_len < sizeof(*ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ ah = mtod(m, struct arphdr *);
+
+ if (m->m_len < arphdr_len(ah)) {
+ error = EINVAL;
+ goto bad;
+ }
+ ah->ar_hrd = htons(ARPHRD_INFINIBAND);
+
+ switch (ntohs(ah->ar_op)) {
+ case ARPOP_REVREQUEST:
+ case ARPOP_REVREPLY:
+ type = htons(ETHERTYPE_REVARP);
+ break;
+ case ARPOP_REQUEST:
+ case ARPOP_REPLY:
+ default:
+ type = htons(ETHERTYPE_ARP);
+ break;
+ }
+
+ if (m->m_flags & M_BCAST) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ if (ah->ar_hln != INFINIBAND_ADDR_LEN) {
+ error = EINVAL;
+ goto bad;
+ }
+ memcpy(edst, ar_tha(ah), INFINIBAND_ADDR_LEN);
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ const struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, const struct ip6_hdr *);
+ if (m->m_len < sizeof(*ip6)) {
+ error = EINVAL;
+ goto bad;
+ } else if (lle != NULL && (lle->la_flags & LLE_VALID)) {
+ memcpy(edst, lle->ll_addr, sizeof(edst));
+ } else if (m->m_flags & M_MCAST) {
+ infiniband_ipv6_multicast_map(
+ &((const struct sockaddr_in6 *)dst)->sin6_addr,
+ ifp->if_broadcastaddr, edst);
+ } else if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
+ memcpy(edst, ifp->if_broadcastaddr, INFINIBAND_ADDR_LEN);
+ } else {
+ error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
+ if (error) {
+ if (error == EWOULDBLOCK)
+ error = 0;
+ m = NULL; /* mbuf is consumed by resolver */
+ goto bad;
+ }
+ }
+ type = htons(ETHERTYPE_IPV6);
+ break;
+ }
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ goto bad;
+ }
+
+ /*
+ * Add local net header. If no space in first mbuf,
+ * allocate another.
+ */
+ M_PREPEND(m, INFINIBAND_HDR_LEN, M_NOWAIT);
+ if (m == NULL) {
+ error = ENOBUFS;
+ goto bad;
+ }
+ ibh = mtod(m, struct infiniband_header *);
+
+ ibh->ib_protocol = type;
+ memcpy(ibh->ib_hwaddr, edst, sizeof(edst));
+
+ /*
+ * Queue message on interface, update output statistics if
+ * successful, and start output if interface not yet active.
+ */
+output:
+ return (ifp->if_transmit(ifp, m));
+bad:
+ if (m != NULL)
+ m_freem(m);
+ return (error);
+}
+
+/*
+ * Process a received Infiniband packet.
+ */
+static void
+infiniband_input(struct ifnet *ifp, struct mbuf *m)
+{
+ struct infiniband_header *ibh;
+ struct epoch_tracker et;
+ int isr;
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ ibh = mtod(m, struct infiniband_header *);
+
+ /*
+ * Reset layer specific mbuf flags to avoid confusing upper
+ * layers:
+ */
+ m->m_flags &= ~M_VLANTAG;
+ m_clrprotoflags(m);
+
+ if (INFINIBAND_IS_MULTICAST(ibh->ib_hwaddr)) {
+ if (memcmp(ibh->ib_hwaddr, ifp->if_broadcastaddr,
+ ifp->if_addrlen) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
+ }
+
+ /* Let BPF have it before we strip the header. */
+ INFINIBAND_BPF_MTAP(ifp, m);
+
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ if (ifp->if_flags & IFF_MONITOR) {
+ m_freem(m);
+ goto done;
+ }
+
+ /* Direct packet to correct FIB based on interface config. */
+ M_SETFIB(m, ifp->if_fib);
+
+ /*
+ * Dispatch frame to upper layer.
+ */
+ switch (ibh->ib_protocol) {
+#ifdef INET
+ case htons(ETHERTYPE_IP):
+ isr = NETISR_IP;
+ break;
+
+ case htons(ETHERTYPE_ARP):
+ if (ifp->if_flags & IFF_NOARP) {
+ /* Discard packet if ARP is disabled on interface */
+ m_freem(m);
+ goto done;
+ }
+ isr = NETISR_ARP;
+ break;
+#endif
+#ifdef INET6
+ case htons(ETHERTYPE_IPV6):
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ goto done;
+ }
+
+ /* Strip off the Infiniband header. */
+ m_adj(m, INFINIBAND_HDR_LEN);
+
+#ifdef MAC
+ /*
+ * Tag the mbuf with an appropriate MAC label before any other
+ * consumers can get to it.
+ */
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+ /* Allow monitor mode to claim this frame, after stats are updated. */
+ NET_EPOCH_ENTER(et);
+ netisr_dispatch(isr, m);
+ NET_EPOCH_EXIT(et);
+done:
+ CURVNET_RESTORE();
+}
+
+static int
+infiniband_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
+ struct sockaddr *sa)
+{
+ struct sockaddr_dl *sdl;
+#ifdef INET
+ struct sockaddr_in *sin;
+#endif
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+#endif
+ uint8_t *e_addr;
+
+ switch (sa->sa_family) {
+ case AF_LINK:
+ /*
+ * No mapping needed. Just check that it's a valid MC address.
+ */
+ sdl = (struct sockaddr_dl *)sa;
+ e_addr = LLADDR(sdl);
+ if (!INFINIBAND_IS_MULTICAST(e_addr))
+ return (EADDRNOTAVAIL);
+ *llsa = NULL;
+ return 0;
+
+#ifdef INET
+ case AF_INET:
+ sin = (struct sockaddr_in *)sa;
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv4_multicast_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
+ e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sin6 = (struct sockaddr_in6 *)sa;
+ /*
+ * An IP6 address of 0 means listen to all of the
+ * multicast address used for IP6. This has no meaning
+ * in infiniband.
+ */
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ return (EADDRNOTAVAIL);
+ sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
+ sdl->sdl_alen = INFINIBAND_ADDR_LEN;
+ e_addr = LLADDR(sdl);
+ infiniband_ipv6_multicast_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
+ *llsa = (struct sockaddr *)sdl;
+ return (0);
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+}
+
+void
+infiniband_ifattach(struct ifnet *ifp, const uint8_t *lla, const uint8_t *llb)
+{
+ struct sockaddr_dl *sdl;
+ struct ifaddr *ifa;
+ int i;
+
+ ifp->if_addrlen = INFINIBAND_ADDR_LEN;
+ ifp->if_hdrlen = INFINIBAND_HDR_LEN;
+ ifp->if_mtu = INFINIBAND_MTU;
+ if_attach(ifp);
+ ifp->if_output = infiniband_output;
+ ifp->if_input = infiniband_input;
+ ifp->if_resolvemulti = infiniband_resolvemulti;
+
+ if (ifp->if_baudrate == 0)
+ ifp->if_baudrate = IF_Gbps(10); /* default value */
+ if (llb != NULL)
+ ifp->if_broadcastaddr = llb;
+
+ ifa = ifp->if_addr;
+ KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ sdl->sdl_type = IFT_INFINIBAND;
+ sdl->sdl_alen = ifp->if_addrlen;
+
+ if (lla != NULL) {
+ memcpy(LLADDR(sdl), lla, ifp->if_addrlen);
+
+ if (ifp->if_hw_addr != NULL)
+ memcpy(ifp->if_hw_addr, lla, ifp->if_addrlen);
+ } else {
+ lla = LLADDR(sdl);
+ }
+
+ /* Attach ethernet compatible network device */
+ bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
+
+ /* Announce Infiniband MAC address if non-zero. */
+ for (i = 0; i < ifp->if_addrlen; i++)
+ if (lla[i] != 0)
+ break;
+ if (i != ifp->if_addrlen)
+ if_printf(ifp, "Infiniband address: %20D\n", lla, ":");
+
+ /* Add necessary bits are setup; announce it now. */
+ EVENTHANDLER_INVOKE(infiniband_ifattach_event, ifp);
+
+ if (IS_DEFAULT_VNET(curvnet))
+ devctl_notify("INFINIBAND", ifp->if_xname, "IFATTACH", NULL);
+}
+
+/*
+ * Perform common duties while detaching an Infiniband interface
+ */
+void
+infiniband_ifdetach(struct ifnet *ifp)
+{
+ bpfdetach(ifp);
+ if_detach(ifp);
+}
+
+static int
+infiniband_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ return (0);
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+static moduledata_t infiniband_mod = {
+ .name = "if_infiniband",
+ .evhand = &infiniband_modevent,
+};
+
+DECLARE_MODULE(if_infiniband, infiniband_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
+MODULE_VERSION(if_infiniband, 1);
Index: head/sys/net/infiniband.h
===================================================================
--- head/sys/net/infiniband.h
+++ head/sys/net/infiniband.h
@@ -0,0 +1,80 @@
+/*-
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __INFINIBAND_H__
+#define __INFINIBAND_H__
+
+#include <sys/cdefs.h>
+#include <sys/stdint.h>
+
+#define INFINIBAND_ADDR_LEN 20 /* bytes */
+#define INFINIBAND_MTU 1500 /* bytes - default value */
+
+#define INFINIBAND_ENC_LEN 4 /* bytes */
+#define INFINIBAND_HDR_LEN \
+ (INFINIBAND_ADDR_LEN + INFINIBAND_ENC_LEN)
+
+#define INFINIBAND_IS_MULTICAST(addr) \
+ ((addr)[4] == 0xff)
+
+#define INFINIBAND_BPF_MTAP(_ifp, _m) \
+do { \
+ if (bpf_peers_present((_ifp)->if_bpf)) { \
+ M_ASSERTVALID(_m); \
+ infiniband_bpf_mtap(_ifp, _m); \
+ } \
+} while (0)
+
+struct infiniband_header {
+ uint8_t ib_hwaddr[INFINIBAND_ADDR_LEN];
+ uint16_t ib_protocol; /* big endian */
+ uint16_t ib_reserved; /* zero */
+} __packed;
+
+struct infiniband_address {
+ uint8_t octet[INFINIBAND_ADDR_LEN];
+} __packed;
+
+#ifdef _KERNEL
+
+#include <sys/_eventhandler.h>
+
+struct ifnet;
+struct mbuf;
+
+extern void infiniband_ifattach(struct ifnet *, const uint8_t *hwaddr, const uint8_t *bcaddr);
+extern void infiniband_ifdetach(struct ifnet *);
+extern void infiniband_bpf_mtap(struct ifnet *, struct mbuf *);
+
+/* new infiniband interface attached event */
+typedef void (*infiniband_ifattach_event_handler_t)(void *, struct ifnet *);
+
+EVENTHANDLER_DECLARE(infiniband_ifattach_event, infiniband_ifattach_event_handler_t);
+
+#endif
+
+#endif /* __INFINIBAND_H__ */
Index: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -438,16 +438,7 @@
extern struct workqueue_struct *ipoib_workqueue;
-#define IPOIB_MTAP_PROTO(_ifp, _m, _proto) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_proto((_ifp), (_m), (_proto)); \
- } \
-} while (0)
-
/* functions */
-void ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto);
void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
@@ -463,8 +454,6 @@
int ipoib_add_pkey_attr(struct ipoib_dev_priv *priv);
int ipoib_add_umcast_attr(struct ipoib_dev_priv *priv);
-void ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto);
-
void ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
@@ -540,7 +529,7 @@
void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req);
void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length);
-struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int size);
+struct mbuf *ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req, int align, int size);
void ipoib_set_ethtool_ops(struct ifnet *dev);
Index: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -153,7 +153,7 @@
ipoib_cm_alloc_rx_mb(struct ipoib_dev_priv *priv, struct ipoib_cm_rx_buf *rx_req)
{
return ipoib_alloc_map_mb(priv, (struct ipoib_rx_buf *)rx_req,
- priv->cm.max_cm_mtu);
+ sizeof(struct ipoib_pseudoheader), priv->cm.max_cm_mtu);
}
static void ipoib_cm_free_rx_ring(struct ipoib_dev_priv *priv,
@@ -484,10 +484,7 @@
struct mbuf *mb, *newmb;
struct ipoib_cm_rx *p;
int has_srq;
- u_short proto;
- CURVNET_SET_QUIET(dev->if_vnet);
-
ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
wr_id, wc->status);
@@ -561,16 +558,24 @@
ipoib_dma_mb(priv, mb, wc->byte_len);
- if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
- if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
-
mb->m_pkthdr.rcvif = dev;
- proto = *mtod(mb, uint16_t *);
- m_adj(mb, IPOIB_ENCAP_LEN);
- IPOIB_MTAP_PROTO(dev, mb, proto);
- ipoib_demux(dev, mb, ntohs(proto));
+ M_PREPEND(mb, sizeof(struct ipoib_pseudoheader), M_NOWAIT);
+ if (likely(mb != NULL)) {
+ struct ipoib_header *ibh;
+ if_inc_counter(dev, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(dev, IFCOUNTER_IBYTES, mb->m_pkthdr.len);
+
+ /* fixup destination infiniband address */
+ ibh = mtod(mb, struct ipoib_header *);
+ memset(ibh->hwaddr, 0, 4);
+ memcpy(ibh->hwaddr + 4, priv->local_gid.raw, sizeof(union ib_gid));
+
+ dev->if_input(dev, mb);
+ } else {
+ if_inc_counter(dev, IFCOUNTER_IERRORS, 1);
+ }
repost:
if (has_srq) {
if (unlikely(ipoib_cm_post_receive_srq(priv, wr_id)))
@@ -587,7 +592,6 @@
}
}
done:
- CURVNET_RESTORE();
return;
}
Index: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -112,17 +112,19 @@
struct mbuf *
ipoib_alloc_map_mb(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req,
- int size)
+ int align, int size)
{
struct mbuf *mb, *m;
int i, j;
rx_req->mb = NULL;
- mb = m_getm2(NULL, size, M_NOWAIT, MT_DATA, M_PKTHDR);
+ mb = m_getm2(NULL, align + size, M_NOWAIT, MT_DATA, M_PKTHDR);
if (mb == NULL)
return (NULL);
for (i = 0, m = mb; m != NULL; m = m->m_next, i++) {
- m->m_len = M_SIZE(m);
+ m->m_len = M_SIZE(m) - align;
+ m->m_data += align;
+ align = 0;
mb->m_pkthdr.len += m->m_len;
rx_req->mapping[i] = ib_dma_map_single(priv->ca,
mtod(m, void *), m->m_len, DMA_FROM_DEVICE);
@@ -174,7 +176,7 @@
{
return ipoib_alloc_map_mb(priv, &priv->rx_ring[id],
- priv->max_ib_mtu + IB_GRH_BYTES);
+ 0, priv->max_ib_mtu + IB_GRH_BYTES);
}
static int ipoib_ib_post_receives(struct ipoib_dev_priv *priv)
Index: head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,21 +40,16 @@
#include "ipoib.h"
#include <sys/eventhandler.h>
-static int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
- struct sockaddr *);
-
-
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
-#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
+#include <net/infiniband.h>
+
#include <rdma/ib_cache.h>
MODULE_AUTHOR("Roland Dreier");
@@ -98,19 +93,8 @@
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static void ipoib_start(struct ifnet *dev);
-static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro);
static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
-static void ipoib_input(struct ifnet *ifp, struct mbuf *m);
-#define IPOIB_MTAP(_ifp, _m) \
-do { \
- if (bpf_peers_present((_ifp)->if_bpf)) { \
- M_ASSERTVALID(_m); \
- ipoib_mtap_mb((_ifp), (_m)); \
- } \
-} while (0)
-
static struct unrhdr *ipoib_unrhdr;
static void
@@ -136,37 +120,6 @@
}
SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
-/*
- * This is for clients that have an ipoib_header in the mbuf.
- */
-static void
-ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
-{
- struct ipoib_header *ih;
- struct ether_header eh;
-
- ih = mtod(mb, struct ipoib_header *);
- eh.ether_type = ih->proto;
- bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- mb->m_data += sizeof(struct ipoib_header);
- mb->m_len -= sizeof(struct ipoib_header);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
- mb->m_data -= sizeof(struct ipoib_header);
- mb->m_len += sizeof(struct ipoib_header);
-}
-
-void
-ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
-{
- struct ether_header eh;
-
- eh.ether_type = proto;
- bzero(&eh.ether_shost, ETHER_ADDR_LEN);
- bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
- bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
-}
-
static struct ib_client ipoib_client = {
.name = "ipoib",
.add = ipoib_add_one,
@@ -787,7 +740,7 @@
IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
if (mb == NULL)
break;
- IPOIB_MTAP(dev, mb);
+ INFINIBAND_BPF_MTAP(dev, mb);
ipoib_send_one(priv, mb);
}
}
@@ -875,8 +828,7 @@
dev = priv->dev;
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
priv->gone = 1;
- bpfdetach(dev);
- if_detach(dev);
+ infiniband_ifdetach(dev);
if_free(dev);
free_unr(ipoib_unrhdr, priv->unit);
} else
@@ -935,7 +887,6 @@
ipoib_intf_alloc(const char *name)
{
struct ipoib_dev_priv *priv;
- struct sockaddr_dl *sdl;
struct ifnet *dev;
priv = ipoib_priv_alloc();
@@ -953,24 +904,17 @@
}
if_initname(dev, name, priv->unit);
dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
- dev->if_addrlen = INFINIBAND_ALEN;
- dev->if_hdrlen = IPOIB_HEADER_LEN;
- if_attach(dev);
+
+ infiniband_ifattach(dev, NULL, priv->broadcastaddr);
+
dev->if_init = ipoib_init;
dev->if_ioctl = ipoib_ioctl;
dev->if_start = ipoib_start;
- dev->if_output = ipoib_output;
- dev->if_input = ipoib_input;
- dev->if_resolvemulti = ipoib_resolvemulti;
- dev->if_baudrate = IF_Gbps(10);
- dev->if_broadcastaddr = priv->broadcastaddr;
+
dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
- sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
- sdl->sdl_type = IFT_INFINIBAND;
- sdl->sdl_alen = dev->if_addrlen;
+
priv->dev = dev;
if_link_state_change(dev, LINK_STATE_DOWN);
- bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
return dev->if_softc;
}
@@ -1165,7 +1109,6 @@
struct ifaddr *ifa;
int retval = 0;
- CURVNET_SET(dev->if_vnet);
NET_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
if (ifa->ifa_addr == NULL ||
@@ -1179,7 +1122,6 @@
}
}
NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
return (retval);
}
@@ -1475,286 +1417,6 @@
ib_sa_unregister_client(&ipoib_sa_client);
destroy_workqueue(ipoib_workqueue);
}
-
-/*
- * Infiniband output routine.
- */
-static int
-ipoib_output(struct ifnet *ifp, struct mbuf *m,
- const struct sockaddr *dst, struct route *ro)
-{
- u_char edst[INFINIBAND_ALEN];
-#if defined(INET) || defined(INET6)
- struct llentry *lle = NULL;
-#endif
- struct ipoib_header *eh;
- int error = 0, is_gw = 0;
- short type;
-
- NET_EPOCH_ASSERT();
-
- if (ro != NULL)
- is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m);
- if (error)
- goto bad;
-#endif
-
- M_PROFILE(m);
- if (ifp->if_flags & IFF_MONITOR) {
- error = ENETDOWN;
- goto bad;
- }
- if (!((ifp->if_flags & IFF_UP) &&
- (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
- error = ENETDOWN;
- goto bad;
- }
-
- switch (dst->sa_family) {
-#ifdef INET
- case AF_INET:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
- else
- error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return (error == EWOULDBLOCK ? 0 : error);
- type = htons(ETHERTYPE_IP);
- break;
- case AF_ARP:
- {
- struct arphdr *ah;
- ah = mtod(m, struct arphdr *);
- ah->ar_hrd = htons(ARPHRD_INFINIBAND);
-
- switch(ntohs(ah->ar_op)) {
- case ARPOP_REVREQUEST:
- case ARPOP_REVREPLY:
- type = htons(ETHERTYPE_REVARP);
- break;
- case ARPOP_REQUEST:
- case ARPOP_REPLY:
- default:
- type = htons(ETHERTYPE_ARP);
- break;
- }
-
- if (m->m_flags & M_BCAST)
- bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
- else
- bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
-
- }
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- if (lle != NULL && (lle->la_flags & LLE_VALID))
- memcpy(edst, lle->ll_addr, sizeof(edst));
- else if (m->m_flags & M_MCAST)
- ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
- else
- error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
- if (error)
- return error;
- type = htons(ETHERTYPE_IPV6);
- break;
-#endif
-
- default:
- if_printf(ifp, "can't handle af%d\n", dst->sa_family);
- error = EAFNOSUPPORT;
- goto bad;
- }
-
- /*
- * Add local net header. If no space in first mbuf,
- * allocate another.
- */
- M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
- if (m == NULL) {
- error = ENOBUFS;
- goto bad;
- }
- eh = mtod(m, struct ipoib_header *);
- (void)memcpy(&eh->proto, &type, sizeof(eh->proto));
- (void)memcpy(&eh->hwaddr, edst, sizeof (edst));
-
- /*
- * Queue message on interface, update output statistics if
- * successful, and start output if interface not yet active.
- */
- return ((ifp->if_transmit)(ifp, m));
-bad:
- if (m != NULL)
- m_freem(m);
- return (error);
-}
-
-/*
- * Upper layer processing for a received Infiniband packet.
- */
-void
-ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto)
-{
- struct epoch_tracker et;
- int isr;
-
-#ifdef MAC
- /*
- * Tag the mbuf with an appropriate MAC label before any other
- * consumers can get to it.
- */
- mac_ifnet_create_mbuf(ifp, m);
-#endif
- /* Allow monitor mode to claim this frame, after stats are updated. */
- if (ifp->if_flags & IFF_MONITOR) {
- if_printf(ifp, "discard frame at IFF_MONITOR\n");
- m_freem(m);
- return;
- }
- /* Direct packet to correct FIB based on interface config */
- M_SETFIB(m, ifp->if_fib);
- /*
- * Dispatch frame to upper layer.
- */
- switch (proto) {
-#ifdef INET
- case ETHERTYPE_IP:
- isr = NETISR_IP;
- break;
-
- case ETHERTYPE_ARP:
- if (ifp->if_flags & IFF_NOARP) {
- /* Discard packet if ARP is disabled on interface */
- m_freem(m);
- return;
- }
- isr = NETISR_ARP;
- break;
-#endif
-#ifdef INET6
- case ETHERTYPE_IPV6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- goto discard;
- }
- NET_EPOCH_ENTER(et);
- netisr_dispatch(isr, m);
- NET_EPOCH_EXIT(et);
- return;
-
-discard:
- m_freem(m);
-}
-
-/*
- * Process a received Infiniband packet.
- */
-static void
-ipoib_input(struct ifnet *ifp, struct mbuf *m)
-{
- struct ipoib_header *eh;
-
- if ((ifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- return;
- }
- CURVNET_SET_QUIET(ifp->if_vnet);
-
- /* Let BPF have it before we strip the header. */
- IPOIB_MTAP(ifp, m);
- eh = mtod(m, struct ipoib_header *);
- /*
- * Reset layer specific mbuf flags to avoid confusing upper layers.
- * Strip off Infiniband header.
- */
- m->m_flags &= ~M_VLANTAG;
- m_clrprotoflags(m);
- m_adj(m, IPOIB_HEADER_LEN);
-
- if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
- if (memcmp(eh->hwaddr, ifp->if_broadcastaddr,
- ifp->if_addrlen) == 0)
- m->m_flags |= M_BCAST;
- else
- m->m_flags |= M_MCAST;
- if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
- }
-
- ipoib_demux(ifp, m, ntohs(eh->proto));
- CURVNET_RESTORE();
-}
-
-static int
-ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
- struct sockaddr *sa)
-{
- struct sockaddr_dl *sdl;
-#ifdef INET
- struct sockaddr_in *sin;
-#endif
-#ifdef INET6
- struct sockaddr_in6 *sin6;
-#endif
- u_char *e_addr;
-
- switch(sa->sa_family) {
- case AF_LINK:
- /*
- * No mapping needed. Just check that it's a valid MC address.
- */
- sdl = (struct sockaddr_dl *)sa;
- e_addr = LLADDR(sdl);
- if (!IPOIB_IS_MULTICAST(e_addr))
- return EADDRNOTAVAIL;
- *llsa = NULL;
- return 0;
-
-#ifdef INET
- case AF_INET:
- sin = (struct sockaddr_in *)sa;
- if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
- e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-#ifdef INET6
- case AF_INET6:
- sin6 = (struct sockaddr_in6 *)sa;
- /*
- * An IP6 address of 0 means listen to all
- * of the multicast address used for IP6.
- * This has no meaning in ipoib.
- */
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
- return EADDRNOTAVAIL;
- sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
- sdl->sdl_alen = INFINIBAND_ALEN;
- e_addr = LLADDR(sdl);
- ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
- *llsa = (struct sockaddr *)sdl;
- return 0;
-#endif
-
- default:
- return EAFNOSUPPORT;
- }
-}
-
module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
@@ -1771,4 +1433,5 @@
DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
+MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Nov 17, 6:43 AM (21 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14671956
Default Alt Text
D26254.diff (32 KB)
Attached To
Mode
D26254: Add support for IPoIB lagg devices in FreeBSD
Attached
Detach File
Event Timeline
Log In to Comment