Does FreeBSD have sendmmsg or recvmmsg system calls?

Konstantin Belousov kostikbel at gmail.com
Sun Jan 24 05:06:52 UTC 2016


Overall, the patch starts taking the committable shape, I only have small
notes about it.

On Fri, Jan 22, 2016 at 11:15:18AM +0200, Boris Astardzhiev wrote:
> be>None of the above.  Plain recvmsg() returns ssize_t and its len arg has
> be>type size_t.  That is excessively typedefed and excessively large with
> be>64-bit ssize_t, but it is silly for the multiple-message variant to use
> be>smaller types.
> be>
> be>Otherwise, all the integer types should be int.
> 
> It seems logical. I'll convert the ret easily to ssize_t and the vector
> length
> to size_t. Now it differs from the Linux prototype but I guess it's okay.
Lets try.  I do think that the change is for good.

> 
> be>The errno method (and not checking ret at all) is best if for syscalls
> that
> be>return -1 for a non-error.  It is not needed here.
> 
> Fixing it.
> 
> kb> I do not see any sense in making the functions with signature or
> semantic
> kb> different from Linux version.  Right now, the goal of including the
> patch
> kb> is compatibility.
> 
> Regarding recvmmsg() -
> I tried to implement MSG_WAITFORONE and the timeout stuff using
> pselect(2) due to the timespec structure. I could have used ppoll and
> I'm not sure which of these two is more appropriate or maybe there's
> another approach? Now it has timeout just as in the Linux prototype.
> Comments are welcomed.
You defer to ppoll() and pselect() due to the struct timespec type of
the argument, am I right ?

>
> See patch.

> diff --git a/lib/libc/include/namespace.h b/lib/libc/include/namespace.h
> index 739d7b1..c95829e 100644
> --- a/lib/libc/include/namespace.h
> +++ b/lib/libc/include/namespace.h
> @@ -208,6 +208,7 @@
>  #define		readv				_readv
>  #define		recvfrom			_recvfrom
>  #define		recvmsg				_recvmsg
> +#define		recvmmsg			_recvmmsg
>  #define		select				_select
>  #define		sem_close			_sem_close
>  #define		sem_destroy			_sem_destroy
> @@ -220,6 +221,7 @@
>  #define		sem_unlink			_sem_unlink
>  #define		sem_wait			_sem_wait
>  #define		sendmsg				_sendmsg
> +#define		sendmmsg			_sendmmsg
>  #define		sendto				_sendto
>  #define		setsockopt			_setsockopt
>  /*#define		sigaction			_sigaction*/
> diff --git a/lib/libc/include/un-namespace.h b/lib/libc/include/un-namespace.h
> index f31fa7a..0233348 100644
> --- a/lib/libc/include/un-namespace.h
> +++ b/lib/libc/include/un-namespace.h
> @@ -189,6 +189,7 @@
>  #undef		readv
>  #undef		recvfrom
>  #undef		recvmsg
> +#undef		recvmmsg
>  #undef		select
>  #undef		sem_close
>  #undef		sem_destroy
> @@ -201,6 +202,7 @@
>  #undef		sem_unlink
>  #undef		sem_wait
>  #undef		sendmsg
> +#undef		sendmmsg
>  #undef		sendto
>  #undef		setsockopt
>  #undef		sigaction
> diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
> index e4fe1b2..5f8b699 100644
> --- a/lib/libc/sys/Makefile.inc
> +++ b/lib/libc/sys/Makefile.inc
> @@ -28,6 +28,8 @@ SRCS+= futimens.c utimensat.c
>  NOASM+= futimens.o utimensat.o
>  PSEUDO+= _futimens.o _utimensat.o
>  
> +SRCS+= recvmmsg.c sendmmsg.c
> +
BTW, just noted, I think the functions should live in libc/gen.

>  INTERPOSED = \
>  	accept \
>  	accept4 \
> diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
> index 7b3257c..dc2ed0e 100644
> --- a/lib/libc/sys/Symbol.map
> +++ b/lib/libc/sys/Symbol.map
> @@ -399,6 +399,8 @@ FBSD_1.4 {
>  	utimensat;
>  	numa_setaffinity;
>  	numa_getaffinity;
> +	sendmmsg;
> +	recvmmsg;
>  };
>  
>  FBSDprivate_1.0 {
> diff --git a/lib/libc/sys/recv.2 b/lib/libc/sys/recv.2
> index 326e7ff..fd2b2a1 100644
> --- a/lib/libc/sys/recv.2
> +++ b/lib/libc/sys/recv.2
> @@ -34,8 +34,9 @@
>  .Sh NAME
>  .Nm recv ,
>  .Nm recvfrom ,
> -.Nm recvmsg
> -.Nd receive a message from a socket
> +.Nm recvmsg ,
> +.Nm recvmmsg
> +.Nd receive message(s) from a socket
>  .Sh LIBRARY
>  .Lb libc
>  .Sh SYNOPSIS
> @@ -47,11 +48,15 @@
>  .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr * restrict from" "socklen_t * restrict fromlen"
>  .Ft ssize_t
>  .Fn recvmsg "int s" "struct msghdr *msg" "int flags"
> +.Ft ssize_t
> +.Fn recvmmsg "int s" "struct mmsghdr *msgvec" "size_t vlen" "int flags" "const struct timespec *timeout"
>  .Sh DESCRIPTION
>  The
>  .Fn recvfrom
>  and
>  .Fn recvmsg
> +and
> +.Fn recvmmsg
>  system calls
>  are used to receive messages from a socket,
>  and may be used to receive data on a socket whether or not
> @@ -84,8 +89,30 @@ null pointer passed as its
>  .Fa from
>  argument.
>  .Pp
> -All three routines return the length of the message on successful
> -completion.
> +The
> +.Fn recvmmsg
> +function is used to receive multiple
> +messages at a call.
> +Their number
> +is supplied by
> +.Fa vlen .
> +The messages are placed in the
> +.Fa msgvec
> +vector after reception.
> +The size of each received message is placed in the
> +.Fa msg_len
> +field of each element of the vector.
> +If
> +.Fa timeout
> +is NULL the call will normally block. Otherwise it will wait for data
> +for the specified amount of time. If the timeout expires and there is
> +no data received a value of 0 is returned. pselect(2) is used for the
> +implementation of the timeout mechanism.
Put each sentence on new line.

> +.Pp
> +The first three routines return the length of the message on successful
> +completion whereas
> +.Fn recvmmsg
> +returns the number of received messages.
>  If a message is too long to fit in the supplied buffer,
>  excess bytes may be discarded depending on the type of socket
>  the message is received from (see
> @@ -100,7 +127,9 @@ in which case the value
>  .Va errno
>  is set to
>  .Er EAGAIN .
> -The receive calls normally return any data available,
> +The receive calls except
> +.Fn recvmmsg
> +normally return any data available,
>  up to the requested amount,
>  rather than waiting for receipt of the full amount requested;
>  this behavior is affected by the socket-level options
> @@ -127,6 +156,9 @@ one or more of the values:
>  .It Dv MSG_WAITALL Ta wait for full request or error
>  .It Dv MSG_DONTWAIT Ta do not block
>  .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec
> +.It Dv MSG_WAITFORONE Ta do not block after receiving the first message
> +(relevant only for
> +.Fn recvmmsg )
>  .El
>  .Pp
>  The
> @@ -158,6 +190,11 @@ is set to
>  This flag is not available in strict
>  .Tn ANSI
>  or C99 compilation mode.
> +The
> +.Dv MSG_WAITFORONE
> +flag sets MSG_DONTWAIT after the first message has been received. This flag
> +is only relevant for
> +.Fn recvmmsg .
>  .Pp
>  The
>  .Fn recvmsg
> @@ -290,9 +327,34 @@ control data were discarded due to lack of space in the buffer
>  for ancillary data.
>  .Dv MSG_OOB
>  is returned to indicate that expedited or out-of-band data were received.
> +.Pp
> +The
> +.Fn recvmmsg
> +system call uses the
> +.Fa mmsghdr
> +structure. Its form is as follows, as defined in
> +.In sys/socket.h :
> +.Bd -literal
> +struct mmsghdr {
> +	struct msghdr	 msg_hdr;	/* message header */
> +	unsigned int	 msg_len;	/* message length */
> +};
> +.Ed
> +.Pp
> +For
> +.Fa msg_hdr
> +see above. On data reception the
> +.Fa msg_len
> +field is updated to the length of the received message. On
> +data transmission it is updated to the number of
> +characters sent.
>  .Sh RETURN VALUES
> -These calls return the number of bytes received, or -1
> -if an error occurred.
> +These calls except
> +.Fn recvmmsg
> +return the number of bytes received.
> +.Fn recvmmsg
> +returns the number of messages received.
> +A value of -1 is returned if an error occurred.
>  .Sh ERRORS
>  The calls fail if:
>  .Bl -tag -width Er
> diff --git a/lib/libc/sys/recvmmsg.c b/lib/libc/sys/recvmmsg.c
> new file mode 100644
> index 0000000..19a937b
> --- /dev/null
> +++ b/lib/libc/sys/recvmmsg.c
> @@ -0,0 +1,96 @@
> +/*
> + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice(s), this list of conditions and the following disclaimer as
> + *    the first lines of this file unmodified other than the possible
> + *    addition of one or more copyright notices.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice(s), this list of conditions and the following disclaimer in
> + *    the documentation and/or other materials provided with the
> + *    distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
> + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
> + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <errno.h>
> +#include <sys/types.h>
> +#include <sys/syscall.h>
> +#include <sys/socket.h>
> +#include <sys/select.h>
> +#include <pthread.h>
> +#include "libc_private.h"
> +
> +#define CMTR(s, timeout)						\
> +	do {								\
> +		fd_set fds;						\
> +		int res;						\
> +									\
> +		FD_ZERO(&fds);						\
> +		FD_SET((s), &fds);					\
> +		res = __sys_pselect((s)+1, &fds, NULL, NULL, (timeout), NULL);\
Why do you need the syscall there ?  Cancellation before any data was
received is fine, since cancellation would not result in data loss.

> +		if (res == -1 || res == 0)				\
> +			return (res);					\
> +		if (!FD_ISSET((s), &fds))				\
> +			return (-1);					\
> +	} while (0);
> +
> +ssize_t
> +recvmmsg(int s, struct mmsghdr *msgvec, size_t vlen, int flags,
> +    const struct timespec *timeout)
> +{
> +	size_t i, rcvd;
> +	ssize_t ret;
> +
> +	if (timeout != NULL)
> +		CMTR(s, timeout);
The CMTR define is only used once.  I do not see why not inline it, and
get rid of the staircase of backslashes.

> +
> +	ret = __sys_recvmsg(s, &msgvec[0].msg_hdr, flags);
> +	if (ret == -1)
> +		return (ret);
> +
> +	/* Check initially for the presence of MSG_WAITFORONE.
> +	 * Turn on MSG_DONTWAIT if set. */
> +	if (flags & MSG_WAITFORONE) {
> +		flags |= MSG_DONTWAIT;
> +		/* The kernel doesn't need to know about this flag. */
> +		flags &= ~MSG_WAITFORONE;
> +	}
> +
> +	rcvd = 1;
> +	for (i = rcvd; i < vlen; i++) {
i = rcvd = 1; ... i++, rcvd++ ?

> +		ret = __sys_recvmsg(s, &msgvec[i].msg_hdr, flags);
> +		if (ret == -1) {
> +			if (rcvd != 0) {
> +				/* We've received messages. Let caller know. */
> +				return (rcvd);
> +			}
> +			return (ret);
> +		}
> +
> +		/* Save received bytes */
> +		msgvec[i].msg_len = ret;
> +		rcvd++;
> +	}
> +
> +	return (rcvd);
> +}
> +
> +#undef CMTR
> diff --git a/lib/libc/sys/send.2 b/lib/libc/sys/send.2
> index 8fa2c64..33fa58d 100644
> --- a/lib/libc/sys/send.2
> +++ b/lib/libc/sys/send.2
> @@ -34,8 +34,9 @@
>  .Sh NAME
>  .Nm send ,
>  .Nm sendto ,
> -.Nm sendmsg
> -.Nd send a message from a socket
> +.Nm sendmsg ,
> +.Nm sendmmsg
> +.Nd send message(s) from a socket
>  .Sh LIBRARY
>  .Lb libc
>  .Sh SYNOPSIS
> @@ -47,6 +48,8 @@
>  .Fn sendto "int s" "const void *msg" "size_t len" "int flags" "const struct sockaddr *to" "socklen_t tolen"
>  .Ft ssize_t
>  .Fn sendmsg "int s" "const struct msghdr *msg" "int flags"
> +.Ft ssize_t
> +.Fn sendmmsg "int s" "struct mmsghdr *msgvec" "size_t vlen" "int flags"
>  .Sh DESCRIPTION
>  The
>  .Fn send
> @@ -55,8 +58,10 @@ and
>  .Fn sendto
>  and
>  .Fn sendmsg
> +and
> +.Fn sendmmsg
>  system calls
> -are used to transmit a message to another socket.
> +are used to transmit one or multiple messages (with the latter call) to another socket.
>  The
>  .Fn send
>  function
> @@ -66,6 +71,8 @@ state, while
>  .Fn sendto
>  and
>  .Fn sendmsg
> +and
> +.Fn sendmmsg
>  may be used at any time.
>  .Pp
>  The address of the target is given by
> @@ -81,6 +88,18 @@ underlying protocol, the error
>  is returned, and
>  the message is not transmitted.
>  .Pp
> +The
> +.Fn sendmmsg
> +function sends multiple messages at a call.
> +They are given by the
> +.Fa msgvec
> +vector along with
> +.Fa vlen
> +specifying its size. The number of
> +characters sent per each message is placed in the
> +.Fa msg_len
> +field of each element of the vector after transmission.
> +.Pp
>  No indication of failure to deliver is implicit in a
>  .Fn send .
>  Locally detected errors are indicated by a return value of -1.
> @@ -138,10 +157,16 @@ See
>  .Xr recv 2
>  for a description of the
>  .Fa msghdr
> +structure and the
> +.Fa mmsghdr
>  structure.
>  .Sh RETURN VALUES
> -The call returns the number of characters sent, or -1
> -if an error occurred.
> +All calls except
> +.Fn sendmmsg
> +return the number of characters sent. The
> +.Fn sendmmsg
> +call returns the number of messages sent.
> +If an error occurred a value of -1 is returned.
>  .Sh ERRORS
>  The
>  .Fn send
> @@ -149,6 +174,8 @@ function and
>  .Fn sendto
>  and
>  .Fn sendmsg
> +and
> +.Fn sendmmsg
>  system calls
>  fail if:
>  .Bl -tag -width Er
> diff --git a/lib/libc/sys/sendmmsg.c b/lib/libc/sys/sendmmsg.c
> new file mode 100644
> index 0000000..cef35a3
> --- /dev/null
> +++ b/lib/libc/sys/sendmmsg.c
> @@ -0,0 +1,63 @@
> +/*
> + * Copyright (c) 2016 Boris Astardzhiev, Smartcom-Bulgaria AD
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice(s), this list of conditions and the following disclaimer as
> + *    the first lines of this file unmodified other than the possible
> + *    addition of one or more copyright notices.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice(s), this list of conditions and the following disclaimer in
> + *    the documentation and/or other materials provided with the
> + *    distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
> + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
> + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
> + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
> + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <errno.h>
> +#include <sys/types.h>
> +#include <sys/syscall.h>
> +#include <sys/socket.h>
> +#include <pthread.h>
> +#include "libc_private.h"
> +
> +ssize_t
> +sendmmsg(int s, struct mmsghdr *msgvec, size_t vlen, int flags)
> +{
> +	size_t i, sent;
> +	ssize_t ret;
> +
> +	sent = 0;
> +	for (i = 0; i < vlen; i++) {
sent = i = 0; ... i++, sent++
> +		ret = __sys_sendmsg(s, &msgvec[i].msg_hdr, flags);
> +		if (ret == -1) {
> +			if (sent != 0) {
> +				/* We have sent messages. Let caller know. */
> +				return (sent);
> +			}
> +			return (ret);
> +		}
> +
> +		/* Save sent bytes */
> +		msgvec[i].msg_len = ret;
> +		sent++;
> +	}
> +
> +	return (sent);
> +}
> diff --git a/sys/sys/socket.h b/sys/sys/socket.h
> index 18e2de1..d95f29e 100644
> --- a/sys/sys/socket.h
> +++ b/sys/sys/socket.h
> @@ -435,6 +435,11 @@ struct msghdr {
>  #ifdef _KERNEL
>  #define	MSG_SOCALLBCK   0x10000		/* for use by socket callbacks - soreceive (TCP) */
>  #endif
> +#ifndef _KERNEL
> +#ifdef __BSD_VISIBLE
> +#define MSG_WAITFORONE	0x100000	/* used in recvmmsg() */
Move the define to the previous __BSD_VISIBLE block, which ends with
CMSG_CLOEXEC.  Also, it seems that the next unused bit is 0x80000.
Replace the comment by 'for recvmmsg()', the MSG_COMPAT is something
private.

> +#endif /* __BSD_VISIBLE */
> +#endif /* !_KERNEL */
>  
>  /*
>   * Header for ancillary data objects in msg_control buffer.
> @@ -595,6 +600,18 @@ struct sf_hdtr {
>  #endif /* _KERNEL */
>  #endif /* __BSD_VISIBLE */
>  
> +#ifndef _KERNEL
> +#ifdef __BSD_VISIBLE
> +/*
> + * Send/recvmmsg specific structure(s)
> + */
> +struct mmsghdr {
> +	struct msghdr	msg_hdr;		/* message header */
> +	unsigned int	msg_len;		/* message length */
Still int for msg_len ?

> +};
> +#endif /* __BSD_VISIBLE */
> +#endif /* !_KERNEL */
> +
>  #ifndef	_KERNEL
>  
>  #include <sys/cdefs.h>
> @@ -615,11 +632,19 @@ int	listen(int, int);
>  ssize_t	recv(int, void *, size_t, int);
>  ssize_t	recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
>  ssize_t	recvmsg(int, struct msghdr *, int);
> +#if __BSD_VISIBLE
> +struct timespec;
> +ssize_t	recvmmsg(int, struct mmsghdr *, size_t, int,
> +    const struct timespec *);
It probably makes sense to mark pointers with __restrict.

> +#endif
>  ssize_t	send(int, const void *, size_t, int);
>  ssize_t	sendto(int, const void *,
>  	    size_t, int, const struct sockaddr *, socklen_t);
>  ssize_t	sendmsg(int, const struct msghdr *, int);
>  #if __BSD_VISIBLE
> +ssize_t	sendmmsg(int, struct mmsghdr *, size_t, int);
> +#endif
> +#if __BSD_VISIBLE
>  int	sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
>  int	setfib(int);
>  #endif

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-threads/attachments/20160124/66563e16/attachment.sig>


More information about the freebsd-threads mailing list