Does FreeBSD have sendmmsg or recvmmsg system calls?
Konstantin Belousov
kostikbel at gmail.com
Sun Jan 24 05:06:52 UTC 2016
Overall, the patch starts taking the committable shape, I only have small
notes about it.
On Fri, Jan 22, 2016 at 11:15:18AM +0200, Boris Astardzhiev wrote:
> be>None of the above. Plain recvmsg() returns ssize_t and its len arg has
> be>type size_t. That is excessively typedefed and excessively large with
> be>64-bit ssize_t, but it is silly for the multiple-message variant to use
> be>smaller types.
> be>
> be>Otherwise, all the integer types should be int.
>
> It seems logical. I'll convert the ret easily to ssize_t and the vector
> length
> to size_t. Now it differs from the Linux prototype but I guess it's okay.
Lets try. I do think that the change is for good.
>
> be>The errno method (and not checking ret at all) is best if for syscalls
> that
> be>return -1 for a non-error. It is not needed here.
>
> Fixing it.
>
> kb> I do not see any sense in making the functions with signature or
> semantic
> kb> different from Linux version. Right now, the goal of including the
> patch
> kb> is compatibility.
>
> Regarding recvmmsg() -
> I tried to implement MSG_WAITFORONE and the timeout stuff using
> pselect(2) due to the timespec structure. I could have used ppoll and
> I'm not sure which of these two is more appropriate or maybe there's
> another approach? Now it has timeout just as in the Linux prototype.
> Comments are welcomed.
You defer to ppoll() and pselect() due to the struct timespec type of
the argument, am I right ?
>
> See patch.
> diff --git a/lib/libc/include/namespace.h b/lib/libc/include/namespace.h
> index 739d7b1..c95829e 100644
> --- a/lib/libc/include/namespace.h
> +++ b/lib/libc/include/namespace.h
> @@ -208,6 +208,7 @@
> #define readv _readv
> #define recvfrom _recvfrom
> #define recvmsg _recvmsg
> +#define recvmmsg _recvmmsg
> #define select _select
> #define sem_close _sem_close
> #define sem_destroy _sem_destroy
> @@ -220,6 +221,7 @@
> #define sem_unlink _sem_unlink
> #define sem_wait _sem_wait
> #define sendmsg _sendmsg
> +#define sendmmsg _sendmmsg
> #define sendto _sendto
> #define setsockopt _setsockopt
> /*#define sigaction _sigaction*/
> diff --git a/lib/libc/include/un-namespace.h b/lib/libc/include/un-namespace.h
> index f31fa7a..0233348 100644
> --- a/lib/libc/include/un-namespace.h
> +++ b/lib/libc/include/un-namespace.h
> @@ -189,6 +189,7 @@
> #undef readv
> #undef recvfrom
> #undef recvmsg
> +#undef recvmmsg
> #undef select
> #undef sem_close
> #undef sem_destroy
> @@ -201,6 +202,7 @@
> #undef sem_unlink
> #undef sem_wait
> #undef sendmsg
> +#undef sendmmsg
> #undef sendto
> #undef setsockopt
> #undef sigaction
> diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
> index e4fe1b2..5f8b699 100644
> --- a/lib/libc/sys/Makefile.inc
> +++ b/lib/libc/sys/Makefile.inc
> @@ -28,6 +28,8 @@ SRCS+= futimens.c utimensat.c
> NOASM+= futimens.o utimensat.o
> PSEUDO+= _futimens.o _utimensat.o
>
> +SRCS+= recvmmsg.c sendmmsg.c
> +
BTW, just noted, I think the functions should live in libc/gen.
> INTERPOSED = \
> accept \
> accept4 \
> diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
> index 7b3257c..dc2ed0e 100644
> --- a/lib/libc/sys/Symbol.map
> +++ b/lib/libc/sys/Symbol.map
> @@ -399,6 +399,8 @@ FBSD_1.4 {
> utimensat;
> numa_setaffinity;
> numa_getaffinity;
> + sendmmsg;
> + recvmmsg;
> };
>
> FBSDprivate_1.0 {
> diff --git a/lib/libc/sys/recv.2 b/lib/libc/sys/recv.2
> index 326e7ff..fd2b2a1 100644
> --- a/lib/libc/sys/recv.2
> +++ b/lib/libc/sys/recv.2
> @@ -34,8 +34,9 @@
> .Sh NAME
> .Nm recv ,
> .Nm recvfrom ,
> -.Nm recvmsg
> -.Nd receive a message from a socket
> +.Nm recvmsg ,
> +.Nm recvmmsg
> +.Nd receive message(s) from a socket
> .Sh LIBRARY
> .Lb libc
> .Sh SYNOPSIS
> @@ -47,11 +48,15 @@
> .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr * restrict from" "socklen_t * restrict fromlen"
> .Ft ssize_t
> .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
> +.Ft ssize_t
> +.Fn recvmmsg "int s" "struct mmsghdr *msgvec" "size_t vlen" "int flags" "const struct timespec *timeout"
> .Sh DESCRIPTION
> The
> .Fn recvfrom
> and
> .Fn recvmsg
> +and
> +.Fn recvmmsg
> system calls
> are used to receive messages from a socket,
> and may be used to receive data on a socket whether or not
> @@ -84,8 +89,30 @@ null pointer passed as its
> .Fa from
> argument.
> .Pp
> -All three routines return the length of the message on successful
> -completion.
> +The
> +.Fn recvmmsg
> +function is used to receive multiple
> +messages at a call.
> +Their number
> +is supplied by
> +.Fa vlen .
> +The messages are placed in the
> +.Fa msgvec
> +vector after reception.
> +The size of each received message is placed in the
> +.Fa msg_len
> +field of each element of the vector.
> +If
> +.Fa timeout
> +is NULL the call will normally block. Otherwise it will wait for data
> +for the specified amount of time. If the timeout expires and there is
> +no data received a value of 0 is returned. pselect(2) is used for the
> +implementation of the timeout mechanism.
Put each sentence on new line.
> +.Pp
> +The first three routines return the length of the message on successful
> +completion whereas
> +.Fn recvmmsg
> +returns the number of received messages.
> If a message is too long to fit in the supplied buffer,
> excess bytes may be discarded depending on the type of socket
> the message is received from (see
> @@ -100,7 +127,9 @@ in which case the value
> .Va errno
> is set to
> .Er EAGAIN .
> -The receive calls normally return any data available,
> +The receive calls except
> +.Fn recvmmsg
> +normally return any data available,
> up to the requested amount,
> rather than waiting for receipt of the full amount requested;
> this behavior is affected by the socket-level options
> @@ -127,6 +156,9 @@ one or more of the values:
> .It Dv MSG_WAITALL Ta wait for full request or error
> .It Dv MSG_DONTWAIT Ta do not block
> .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec
> +.It Dv MSG_WAITFORONE Ta do not block after receiving the first message
> +(relevant only for
> +.Fn recvmmsg )
> .El
> .Pp
> The
> @@ -158,6 +190,11 @@ is set to
> This flag is not available in strict
> .Tn ANSI
> or C99 compilation mode.
> +The
> +.Dv MSG_WAITFORONE
> +flag sets MSG_DONTWAIT after the first message has been received. This flag
> +is only relevant for
> +.Fn recvmmsg .
> .Pp
> The
> .Fn recvmsg
> @@ -290,9 +327,34 @@ control data were discarded due to lack of space in the buffer
> for ancillary data.
> .Dv MSG_OOB
> is returned to indicate that expedited or out-of-band data were received.
> +.Pp
> +The
> +.Fn recvmmsg
> +system call uses the
> +.Fa mmsghdr
> +structure. Its form is as follows, as defined in
> +.In sys/socket.h :
> +.Bd -literal
> +struct mmsghdr {
> + struct msghdr msg_hdr; /* message header */
> + unsigned int msg_len; /* message length */
> +};
> +.Ed
> +.Pp
> +For
> +.Fa msg_hdr
> +see above. On data reception the
> +.Fa msg_len
> +field is updated to the length of the received message. On
> +data transmission it is updated to the number of
> +characters sent.
> .Sh RETURN VALUES
> -These calls return the number of bytes received, or -1
> -if an error occurred.
> +These calls except
> +.Fn recvmmsg
> +return the number of bytes received.
> +.Fn recvmmsg
> +returns the number of messages received.
> +A value of -1 is returned if an error occurred.
> .Sh ERRORS
> The calls fail if:
> .Bl -tag -width Er
> diff --git a/lib/libc/sys/recvmmsg.c b/lib/libc/sys/recvmmsg.c
> new file mode 100644
> index 0000000..19a937b
> --- /dev/null
> +++ b/lib/libc/sys/recvmmsg.c
> @@ -0,0 +1,96 @@
> +/*
> + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice(s), this list of conditions and the following disclaimer as
> + * the first lines of this file unmodified other than the possible
> + * addition of one or more copyright notices.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice(s), this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
> + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
> + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <errno.h>
> +#include <sys/types.h>
> +#include <sys/syscall.h>
> +#include <sys/socket.h>
> +#include <sys/select.h>
> +#include <pthread.h>
> +#include "libc_private.h"
> +
> +#define CMTR(s, timeout) \
> + do { \
> + fd_set fds; \
> + int res; \
> + \
> + FD_ZERO(&fds); \
> + FD_SET((s), &fds); \
> + res = __sys_pselect((s)+1, &fds, NULL, NULL, (timeout), NULL);\
Why do you need the syscall there ? Cancellation before any data was
received is fine, since cancellation would not result in data loss.
> + if (res == -1 || res == 0) \
> + return (res); \
> + if (!FD_ISSET((s), &fds)) \
> + return (-1); \
> + } while (0);
> +
> +ssize_t
> +recvmmsg(int s, struct mmsghdr *msgvec, size_t vlen, int flags,
> + const struct timespec *timeout)
> +{
> + size_t i, rcvd;
> + ssize_t ret;
> +
> + if (timeout != NULL)
> + CMTR(s, timeout);
The CMTR define is only used once. I do not see why not inline it, and
get rid of the staircase of backslashes.
> +
> + ret = __sys_recvmsg(s, &msgvec[0].msg_hdr, flags);
> + if (ret == -1)
> + return (ret);
> +
> + /* Check initially for the presence of MSG_WAITFORONE.
> + * Turn on MSG_DONTWAIT if set. */
> + if (flags & MSG_WAITFORONE) {
> + flags |= MSG_DONTWAIT;
> + /* The kernel doesn't need to know about this flag. */
> + flags &= ~MSG_WAITFORONE;
> + }
> +
> + rcvd = 1;
> + for (i = rcvd; i < vlen; i++) {
i = rcvd = 1; ... i++, rcvd++ ?
> + ret = __sys_recvmsg(s, &msgvec[i].msg_hdr, flags);
> + if (ret == -1) {
> + if (rcvd != 0) {
> + /* We've received messages. Let caller know. */
> + return (rcvd);
> + }
> + return (ret);
> + }
> +
> + /* Save received bytes */
> + msgvec[i].msg_len = ret;
> + rcvd++;
> + }
> +
> + return (rcvd);
> +}
> +
> +#undef CMTR
> diff --git a/lib/libc/sys/send.2 b/lib/libc/sys/send.2
> index 8fa2c64..33fa58d 100644
> --- a/lib/libc/sys/send.2
> +++ b/lib/libc/sys/send.2
> @@ -34,8 +34,9 @@
> .Sh NAME
> .Nm send ,
> .Nm sendto ,
> -.Nm sendmsg
> -.Nd send a message from a socket
> +.Nm sendmsg ,
> +.Nm sendmmsg
> +.Nd send message(s) from a socket
> .Sh LIBRARY
> .Lb libc
> .Sh SYNOPSIS
> @@ -47,6 +48,8 @@
> .Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "socklen_t tolen"
> .Ft ssize_t
> .Fn sendmsg "int s" "const struct msghdr *msg" "int flags"
> +.Ft ssize_t
> +.Fn sendmmsg "int s" "struct mmsghdr *msgvec" "size_t vlen" "int flags"
> .Sh DESCRIPTION
> The
> .Fn send
> @@ -55,8 +58,10 @@ and
> .Fn sendto
> and
> .Fn sendmsg
> +and
> +.Fn sendmmsg
> system calls
> -are used to transmit a message to another socket.
> +are used to transmit one or multiple messages (with the latter call) to another socket.
> The
> .Fn send
> function
> @@ -66,6 +71,8 @@ state, while
> .Fn sendto
> and
> .Fn sendmsg
> +and
> +.Fn sendmmsg
> may be used at any time.
> .Pp
> The address of the target is given by
> @@ -81,6 +88,18 @@ underlying protocol, the error
> is returned, and
> the message is not transmitted.
> .Pp
> +The
> +.Fn sendmmsg
> +function sends multiple messages at a call.
> +They are given by the
> +.Fa msgvec
> +vector along with
> +.Fa vlen
> +specifying its size. The number of
> +characters sent per each message is placed in the
> +.Fa msg_len
> +field of each element of the vector after transmission.
> +.Pp
> No indication of failure to deliver is implicit in a
> .Fn send .
> Locally detected errors are indicated by a return value of -1.
> @@ -138,10 +157,16 @@ See
> .Xr recv 2
> for a description of the
> .Fa msghdr
> +structure and the
> +.Fa mmsghdr
> structure.
> .Sh RETURN VALUES
> -The call returns the number of characters sent, or -1
> -if an error occurred.
> +All calls except
> +.Fn sendmmsg
> +return the number of characters sent. The
> +.Fn sendmmsg
> +call returns the number of messages sent.
> +If an error occurred a value of -1 is returned.
> .Sh ERRORS
> The
> .Fn send
> @@ -149,6 +174,8 @@ function and
> .Fn sendto
> and
> .Fn sendmsg
> +and
> +.Fn sendmmsg
> system calls
> fail if:
> .Bl -tag -width Er
> diff --git a/lib/libc/sys/sendmmsg.c b/lib/libc/sys/sendmmsg.c
> new file mode 100644
> index 0000000..cef35a3
> --- /dev/null
> +++ b/lib/libc/sys/sendmmsg.c
> @@ -0,0 +1,63 @@
> +/*
> + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice(s), this list of conditions and the following disclaimer as
> + * the first lines of this file unmodified other than the possible
> + * addition of one or more copyright notices.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice(s), this list of conditions and the following disclaimer in
> + * the documentation and/or other materials provided with the
> + * distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
> + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
> + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <errno.h>
> +#include <sys/types.h>
> +#include <sys/syscall.h>
> +#include <sys/socket.h>
> +#include <pthread.h>
> +#include "libc_private.h"
> +
> +ssize_t
> +sendmmsg(int s, struct mmsghdr *msgvec, size_t vlen, int flags)
> +{
> + size_t i, sent;
> + ssize_t ret;
> +
> + sent = 0;
> + for (i = 0; i < vlen; i++) {
sent = i = 0; ... i++, sent++
> + ret = __sys_sendmsg(s, &msgvec[i].msg_hdr, flags);
> + if (ret == -1) {
> + if (sent != 0) {
> + /* We have sent messages. Let caller know. */
> + return (sent);
> + }
> + return (ret);
> + }
> +
> + /* Save sent bytes */
> + msgvec[i].msg_len = ret;
> + sent++;
> + }
> +
> + return (sent);
> +}
> diff --git a/sys/sys/socket.h b/sys/sys/socket.h
> index 18e2de1..d95f29e 100644
> --- a/sys/sys/socket.h
> +++ b/sys/sys/socket.h
> @@ -435,6 +435,11 @@ struct msghdr {
> #ifdef _KERNEL
> #define MSG_SOCALLBCK 0x10000 /* for use by socket callbacks - soreceive (TCP) */
> #endif
> +#ifndef _KERNEL
> +#ifdef __BSD_VISIBLE
> +#define MSG_WAITFORONE 0x100000 /* used in recvmmsg() */
Move the define to the previous __BSD_VISIBLE block, which ends with
CMSG_CLOEXEC. Also, it seems that the next unused bit is 0x80000.
Replace the comment by 'for recvmmsg()', the MSG_COMPAT is something
private.
> +#endif /* __BSD_VISIBLE */
> +#endif /* !_KERNEL */
>
> /*
> * Header for ancillary data objects in msg_control buffer.
> @@ -595,6 +600,18 @@ struct sf_hdtr {
> #endif /* _KERNEL */
> #endif /* __BSD_VISIBLE */
>
> +#ifndef _KERNEL
> +#ifdef __BSD_VISIBLE
> +/*
> + * Send/recvmmsg specific structure(s)
> + */
> +struct mmsghdr {
> + struct msghdr msg_hdr; /* message header */
> + unsigned int msg_len; /* message length */
Still int for msg_len ?
> +};
> +#endif /* __BSD_VISIBLE */
> +#endif /* !_KERNEL */
> +
> #ifndef _KERNEL
>
> #include <sys/cdefs.h>
> @@ -615,11 +632,19 @@ int listen(int, int);
> ssize_t recv(int, void *, size_t, int);
> ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
> ssize_t recvmsg(int, struct msghdr *, int);
> +#if __BSD_VISIBLE
> +struct timespec;
> +ssize_t recvmmsg(int, struct mmsghdr *, size_t, int,
> + const struct timespec *);
It probably makes sense to mark pointers with __restrict.
> +#endif
> ssize_t send(int, const void *, size_t, int);
> ssize_t sendto(int, const void *,
> size_t, int, const struct sockaddr *, socklen_t);
> ssize_t sendmsg(int, const struct msghdr *, int);
> #if __BSD_VISIBLE
> +ssize_t sendmmsg(int, struct mmsghdr *, size_t, int);
> +#endif
> +#if __BSD_VISIBLE
> int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
> int setfib(int);
> #endif
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-threads/attachments/20160124/66563e16/attachment.sig>
More information about the freebsd-threads
mailing list