git: 051e7d78b039 - main - Kernel-side infrastructure to implement nvlist-based set/get ifcaps
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 24 May 2022 20:59:43 UTC
The branch main has been updated by kib:
URL: https://cgit.FreeBSD.org/src/commit/?id=051e7d78b03944d5910d4f7ad2f1fd6f2cfac382
commit 051e7d78b03944d5910d4f7ad2f1fd6f2cfac382
Author: Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2021-10-17 15:00:34 +0000
Commit: Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-05-24 20:59:32 +0000
Kernel-side infrastructure to implement nvlist-based set/get ifcaps
Reviewed by: hselasky, jhb, kp (previous version)
Sponsored by: NVIDIA Networking
MFC after: 3 weeks
Differential revision: https://reviews.freebsd.org/D32551
---
sys/net/if.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
sys/net/if.h | 59 +++++++++++++++++-
sys/sys/sockio.h | 3 +
3 files changed, 236 insertions(+), 4 deletions(-)
diff --git a/sys/net/if.c b/sys/net/if.c
index bc0240035ea3..c50cc2d291e2 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -58,6 +58,7 @@
#include <sys/lock.h>
#include <sys/refcount.h>
#include <sys/module.h>
+#include <sys/nv.h>
#include <sys/rwlock.h>
#include <sys/sockio.h>
#include <sys/syslog.h>
@@ -2391,6 +2392,88 @@ ifr_data_get_ptr(void *ifrp)
return (ifrup->ifr.ifr_ifru.ifru_data);
}
+struct ifcap_nv_bit_name {
+ int cap_bit;
+ const char *cap_name;
+};
+#define CAPNV(x) {.cap_bit = IFCAP_##x, \
+ .cap_name = __CONCAT(IFCAP_, __CONCAT(x, _NAME)) }
+const struct ifcap_nv_bit_name ifcap_nv_bit_names[] = {
+ CAPNV(RXCSUM),
+ CAPNV(TXCSUM),
+ CAPNV(NETCONS),
+ CAPNV(VLAN_MTU),
+ CAPNV(VLAN_HWTAGGING),
+ CAPNV(JUMBO_MTU),
+ CAPNV(POLLING),
+ CAPNV(VLAN_HWCSUM),
+ CAPNV(TSO4),
+ CAPNV(TSO6),
+ CAPNV(LRO),
+ CAPNV(WOL_UCAST),
+ CAPNV(WOL_MCAST),
+ CAPNV(WOL_MAGIC),
+ CAPNV(TOE4),
+ CAPNV(TOE6),
+ CAPNV(VLAN_HWFILTER),
+ CAPNV(VLAN_HWTSO),
+ CAPNV(LINKSTATE),
+ CAPNV(NETMAP),
+ CAPNV(RXCSUM_IPV6),
+ CAPNV(TXCSUM_IPV6),
+ CAPNV(HWSTATS),
+ CAPNV(TXRTLMT),
+ CAPNV(HWRXTSTMP),
+ CAPNV(MEXTPG),
+ CAPNV(TXTLS4),
+ CAPNV(TXTLS6),
+ CAPNV(VXLAN_HWCSUM),
+ CAPNV(VXLAN_HWTSO),
+ CAPNV(TXTLS_RTLMT),
+ {0, NULL}
+};
+#define CAP2NV(x) {.cap_bit = IFCAP2_##x, \
+ .cap_name = __CONCAT(IFCAP2_, __CONCAT(x, _NAME)) }
+const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = {
+ CAP2NV(RXTLS4),
+ CAP2NV(RXTLS6),
+ {0, NULL}
+};
+#undef CAPNV
+#undef CAP2NV
+
+int
+if_capnv_to_capint(const nvlist_t *nv, int *old_cap,
+ const struct ifcap_nv_bit_name *nn, bool all)
+{
+ int i, res;
+
+ res = 0;
+ for (i = 0; nn[i].cap_name != NULL; i++) {
+ if (nvlist_exists_bool(nv, nn[i].cap_name)) {
+ if (all || nvlist_get_bool(nv, nn[i].cap_name))
+ res |= nn[i].cap_bit;
+ } else {
+ res |= *old_cap & nn[i].cap_bit;
+ }
+ }
+ return (res);
+}
+
+void
+if_capint_to_capnv(nvlist_t *nv, const struct ifcap_nv_bit_name *nn,
+ int ifr_cap, int ifr_req)
+{
+ int i;
+
+ for (i = 0; nn[i].cap_name != NULL; i++) {
+ if ((nn[i].cap_bit & ifr_cap) != 0) {
+ nvlist_add_bool(nv, nn[i].cap_name,
+ (nn[i].cap_bit & ifr_req) != 0);
+ }
+ }
+}
+
/*
* Hardware specific interface ioctls.
*/
@@ -2401,12 +2484,15 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
int error = 0, do_ifup = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
- size_t descrlen;
+ size_t descrlen, nvbuflen;
char *descrbuf, *odescrbuf;
char new_name[IFNAMSIZ];
char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8];
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
+ void *buf;
+ nvlist_t *nvcap;
+ struct siocsifcapnv_driver_data drv_ioctl_data;
ifr = (struct ifreq *)data;
switch (cmd) {
@@ -2425,6 +2511,47 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
ifr->ifr_curcap = ifp->if_capenable;
break;
+ case SIOCGIFCAPNV:
+ if ((ifp->if_capabilities & IFCAP_NV) == 0) {
+ error = EINVAL;
+ break;
+ }
+ buf = NULL;
+ nvcap = nvlist_create(0);
+ for (;;) {
+ if_capint_to_capnv(nvcap, ifcap_nv_bit_names,
+ ifp->if_capabilities, ifp->if_capenable);
+ if_capint_to_capnv(nvcap, ifcap2_nv_bit_names,
+ ifp->if_capabilities2, ifp->if_capenable2);
+ error = (*ifp->if_ioctl)(ifp, SIOCGIFCAPNV,
+ __DECONST(caddr_t, nvcap));
+ if (error != 0) {
+ if_printf(ifp,
+ "SIOCGIFCAPNV driver mistake: nvlist error %d\n",
+ error);
+ break;
+ }
+ buf = nvlist_pack(nvcap, &nvbuflen);
+ if (buf == NULL) {
+ error = nvlist_error(nvcap);
+ if (error == 0)
+ error = EDOOFUS;
+ break;
+ }
+ if (nvbuflen > ifr->ifr_cap_nv.buf_length) {
+ ifr->ifr_cap_nv.length = nvbuflen;
+ ifr->ifr_cap_nv.buffer = NULL;
+ error = EFBIG;
+ break;
+ }
+ ifr->ifr_cap_nv.length = nvbuflen;
+ error = copyout(buf, ifr->ifr_cap_nv.buffer, nvbuflen);
+ break;
+ }
+ free(buf, M_NVLIST);
+ nvlist_destroy(nvcap);
+ break;
+
case SIOCGIFDATA:
{
struct if_data ifd;
@@ -2563,7 +2690,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
case SIOCSIFCAP:
error = priv_check(td, PRIV_NET_SETIFCAP);
- if (error)
+ if (error != 0)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
@@ -2574,6 +2701,53 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
getmicrotime(&ifp->if_lastchange);
break;
+ case SIOCSIFCAPNV:
+ error = priv_check(td, PRIV_NET_SETIFCAP);
+ if (error != 0)
+ return (error);
+ if (ifp->if_ioctl == NULL)
+ return (EOPNOTSUPP);
+ if ((ifp->if_capabilities & IFCAP_NV) == 0)
+ return (EINVAL);
+ if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE)
+ return (EINVAL);
+ nvcap = NULL;
+ buf = malloc(ifr->ifr_cap_nv.length, M_TEMP, M_WAITOK);
+ for (;;) {
+ error = copyin(ifr->ifr_cap_nv.buffer, buf,
+ ifr->ifr_cap_nv.length);
+ if (error != 0)
+ break;
+ nvcap = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
+ if (nvcap == NULL) {
+ error = EINVAL;
+ break;
+ }
+ drv_ioctl_data.reqcap = if_capnv_to_capint(nvcap,
+ &ifp->if_capenable, ifcap_nv_bit_names, false);
+ if ((drv_ioctl_data.reqcap &
+ ~ifp->if_capabilities) != 0) {
+ error = EINVAL;
+ break;
+ }
+ drv_ioctl_data.reqcap2 = if_capnv_to_capint(nvcap,
+ &ifp->if_capenable2, ifcap2_nv_bit_names, false);
+ if ((drv_ioctl_data.reqcap2 &
+ ~ifp->if_capabilities2) != 0) {
+ error = EINVAL;
+ break;
+ }
+ drv_ioctl_data.nvcap = nvcap;
+ error = (*ifp->if_ioctl)(ifp, SIOCSIFCAPNV,
+ (caddr_t)&drv_ioctl_data);
+ break;
+ }
+ nvlist_destroy(nvcap);
+ free(buf, M_TEMP);
+ if (error == 0)
+ getmicrotime(&ifp->if_lastchange);
+ break;
+
#ifdef MAC
case SIOCSIFMAC:
error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
diff --git a/sys/net/if.h b/sys/net/if.h
index 782e792cf87c..4bf29193e7ce 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -236,7 +236,7 @@ struct if_data {
#define IFCAP_TOE4 0x04000 /* interface can offload TCP */
#define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */
#define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */
-/* available 0x20000 */
+#define IFCAP_NV 0x20000 /* can do SIOCGIFCAPNV/SIOCSIFCAPNV */
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
@@ -260,7 +260,40 @@ struct if_data {
#define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6)
#define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6)
-#define IFCAP_CANTCHANGE (IFCAP_NETMAP)
+#define IFCAP_CANTCHANGE (IFCAP_NETMAP | IFCAP_NV)
+#define IFCAP_ALLCAPS 0xffffffff
+
+#define IFCAP_RXCSUM_NAME "RXCSUM"
+#define IFCAP_TXCSUM_NAME "TXCSUM"
+#define IFCAP_NETCONS_NAME "NETCONS"
+#define IFCAP_VLAN_MTU_NAME "VLAN_MTU"
+#define IFCAP_VLAN_HWTAGGING_NAME "VLAN_HWTAGGING"
+#define IFCAP_JUMBO_MTU_NAME "JUMBO_MTU"
+#define IFCAP_POLLING_NAME "POLLING"
+#define IFCAP_VLAN_HWCSUM_NAME "VLAN_HWCSUM"
+#define IFCAP_TSO4_NAME "TSO4"
+#define IFCAP_TSO6_NAME "TSO6"
+#define IFCAP_LRO_NAME "LRO"
+#define IFCAP_WOL_UCAST_NAME "WOL_UCAST"
+#define IFCAP_WOL_MCAST_NAME "WOL_MCAST"
+#define IFCAP_WOL_MAGIC_NAME "WOL_MAGIC"
+#define IFCAP_TOE4_NAME "TOE4"
+#define IFCAP_TOE6_NAME "TOE6"
+#define IFCAP_VLAN_HWFILTER_NAME "VLAN_HWFILTER"
+#define IFCAP_VLAN_HWTSO_NAME "VLAN_HWTSO"
+#define IFCAP_LINKSTATE_NAME "LINKSTATE"
+#define IFCAP_NETMAP_NAME "NETMAP"
+#define IFCAP_RXCSUM_IPV6_NAME "RXCSUM_IPV6"
+#define IFCAP_TXCSUM_IPV6_NAME "TXCSUM_IPV6"
+#define IFCAP_HWSTATS_NAME "HWSTATS"
+#define IFCAP_TXRTLMT_NAME "TXRTLMT"
+#define IFCAP_HWRXTSTMP_NAME "HWRXTSTMP"
+#define IFCAP_MEXTPG_NAME "MEXTPG"
+#define IFCAP_TXTLS4_NAME "TXTLS4"
+#define IFCAP_TXTLS6_NAME "TXTLS6"
+#define IFCAP_VXLAN_HWCSUM_NAME "VXLAN_HWCSUM"
+#define IFCAP_VXLAN_HWTSO_NAME "VXLAN_HWTSO"
+#define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT"
#define IFQ_MAXLEN 50
#define IFNET_SLOWHZ 1 /* granularity is 1 second */
@@ -387,6 +420,15 @@ struct ifreq_buffer {
void *buffer;
};
+struct ifreq_nv_req {
+ u_int buf_length; /* Total size of buffer,
+ u_int for ABI struct ifreq */
+ u_int length; /* Length of the filled part */
+ void *buffer; /* Buffer itself, containing packed nv */
+};
+
+#define IFR_CAP_NV_MAXBUFSIZE (2 * 1024 * 1024)
+
/*
* Interface request structure used for socket
* ioctl's. All interface ioctl's must have parameter
@@ -411,6 +453,7 @@ struct ifreq {
int ifru_cap[2];
u_int ifru_fib;
u_char ifru_vlan_pcp;
+ struct ifreq_nv_req ifru_nv;
} ifr_ifru;
#define ifr_addr ifr_ifru.ifru_addr /* address */
#define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */
@@ -434,6 +477,7 @@ struct ifreq {
#define ifr_fib ifr_ifru.ifru_fib /* interface fib */
#define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
#define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */
+#define ifr_cap_nv ifr_ifru.ifru_nv /* nv-based cap interface */
};
#define _SIZEOF_ADDR_IFREQ(ifr) \
@@ -605,6 +649,17 @@ MALLOC_DECLARE(M_IFMADDR);
extern struct sx ifnet_detach_sxlock;
+struct nvlist;
+struct ifcap_nv_bit_name;
+int if_capnv_to_capint(const struct nvlist *nv, int *old_cap,
+ const struct ifcap_nv_bit_name *nn, bool all);
+void if_capint_to_capnv(struct nvlist *nv,
+ const struct ifcap_nv_bit_name *nn, int ifr_cap, int ifr_req);
+struct siocsifcapnv_driver_data {
+ int reqcap;
+ int reqcap2;
+ struct nvlist *nvcap;
+};
#endif
#ifndef _KERNEL
diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
index 93b8af28e171..b9ed4a439995 100644
--- a/sys/sys/sockio.h
+++ b/sys/sys/sockio.h
@@ -147,4 +147,7 @@
#define SIOCGIFDOWNREASON _IOWR('i', 154, struct ifdownreason)
+#define SIOCSIFCAPNV _IOW('i', 155, struct ifreq) /* set IF features */
+#define SIOCGIFCAPNV _IOWR('i', 156, struct ifreq) /* get IF features */
+
#endif /* !_SYS_SOCKIO_H_ */