git: 1124fc364734 - main - sysutils/slurm-wlm: Fix slurmd and slurmstepd crash due to missing sockaddr length handling in bind()/connect()

From: Vladimir Druzenko <vvd_at_FreeBSD.org>
Date: Sat, 06 Sep 2025 11:17:40 UTC
The branch main has been updated by vvd:

URL: https://cgit.FreeBSD.org/ports/commit/?id=1124fc3647342e2d3fab3a4bb16d52d7bd3d76a0

commit 1124fc3647342e2d3fab3a4bb16d52d7bd3d76a0
Author:     Generic Rikka <rikka.goering@outlook.de>
AuthorDate: 2025-09-06 11:08:58 +0000
Commit:     Vladimir Druzenko <vvd@FreeBSD.org>
CommitDate: 2025-09-06 11:16:18 +0000

    sysutils/slurm-wlm: Fix slurmd and slurmstepd crash due to missing sockaddr length handling in bind()/connect()
    
    After some time the daemons lose connection. Submitting tasks via srun
    fails, and slurmd eventually crashes with a segmentation fault.
    
    The root cause appears to be that several bind() and connect() calls do
    not set the sockaddr length (sun_len, sin_len, sin6_len) correctly on
    FreeBSD. Without this, sockets are initialized improperly and result in
    runtime errors.
    
    Upstream bugreport: https://support.schedmd.com/show_bug.cgi?id=23658
    
    PR:             288983
    Co-authored-by: Vladimir Druzenko <vvd@FreeBSD.org>
    MFH:            2025Q3
---
 sysutils/slurm-wlm/Makefile                        |   2 +-
 sysutils/slurm-wlm/files/patch-src_common_conmgr.c |  40 +++++++-
 sysutils/slurm-wlm/files/patch-src_common_net.c    |  68 +++++++++++++
 .../slurm-wlm/files/patch-src_common_sack__api.c   |  28 ++++++
 .../patch-src_common_slurm__protocol__socket.c     | 107 ++++++++++++++++-----
 .../slurm-wlm/files/patch-src_common_stepd__api.c  |  36 +++++++
 .../slurm-wlm/files/patch-src_common_xsystemd.c    |  25 +++++
 .../files/patch-src_plugins_auth_slurm_sack.c      |  17 ++++
 .../files/patch-src_slurmd_slurmstepd_req.c        |  15 +++
 9 files changed, 310 insertions(+), 28 deletions(-)

diff --git a/sysutils/slurm-wlm/Makefile b/sysutils/slurm-wlm/Makefile
index 3059899a191c..e79c304832ae 100644
--- a/sysutils/slurm-wlm/Makefile
+++ b/sysutils/slurm-wlm/Makefile
@@ -1,6 +1,6 @@
 PORTNAME=	slurm
 DISTVERSION=	23.11.7
-PORTREVISION=	8
+PORTREVISION=	9
 CATEGORIES=	sysutils
 MASTER_SITES=	https://download.schedmd.com/slurm/
 PKGNAMESUFFIX=	-wlm
diff --git a/sysutils/slurm-wlm/files/patch-src_common_conmgr.c b/sysutils/slurm-wlm/files/patch-src_common_conmgr.c
index 4a536f2854cc..4c2b9a256d57 100644
--- a/sysutils/slurm-wlm/files/patch-src_common_conmgr.c
+++ b/sysutils/slurm-wlm/files/patch-src_common_conmgr.c
@@ -1,4 +1,4 @@
---- src/common/conmgr.c.orig	2023-11-21 22:33:29 UTC
+--- src/common/conmgr.c.orig	2024-05-21 17:19:51 UTC
 +++ src/common/conmgr.c
 @@ -47,6 +47,8 @@
  #include <sys/stat.h>
@@ -9,3 +9,41 @@
  #include <time.h>
  #include <unistd.h>
  
+@@ -2616,8 +2618,15 @@ static int _create_socket(void *x, void *arg)
+ 
+ 		/* set value of socket path */
+ 		strlcpy(addr.sun_path, unixsock, sizeof(addr.sun_path));
+-		if ((rc = bind(fd, (const struct sockaddr *) &addr,
+-			       sizeof(addr))))
++		/* Bind UNIX socket: compute correct length */
++		{
++			socklen_t len = SUN_LEN(&addr);
++		#if defined(__FreeBSD__)
++			addr.sun_len = (uint8_t)len;
++		#endif
++			rc = bind(fd, (const struct sockaddr *)&addr, len);
++		}
++		if (rc)
+ 			fatal("%s: [%s] Unable to bind UNIX socket: %m",
+ 			      __func__, hostport);
+ 
+@@ -2675,6 +2684,19 @@ static int _create_socket(void *x, void *arg)
+ 			       &one, sizeof(one)))
+ 			fatal("%s: [%s] setsockopt(SO_REUSEADDR) failed: %m",
+ 			      __func__, addrinfo_to_string(addr));
++
++		/* For INET/INET6, set *_len on FreeBSD; ai_addrlen is already correct */
++		#if defined(__FreeBSD__)
++			if (addr->ai_addr) {
++				if (addr->ai_addr->sa_family == AF_INET) {
++					((struct sockaddr_in *)addr->ai_addr)->sin_len =
++					    (uint8_t)sizeof(struct sockaddr_in);
++				} else if (addr->ai_addr->sa_family == AF_INET6) {
++					((struct sockaddr_in6 *)addr->ai_addr)->sin6_len =
++					    (uint8_t)sizeof(struct sockaddr_in6);
++				}
++			}
++		#endif
+ 
+ 		if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0)
+ 			fatal("%s: [%s] Unable to bind socket: %m",
diff --git a/sysutils/slurm-wlm/files/patch-src_common_net.c b/sysutils/slurm-wlm/files/patch-src_common_net.c
new file mode 100644
index 000000000000..8bfdea648018
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_common_net.c
@@ -0,0 +1,68 @@
+--- src/common/net.c.orig	2024-05-21 17:19:51 UTC
++++ src/common/net.c
+@@ -75,6 +75,29 @@
+ #include "src/common/xstring.h"
+ 
+ /*
++ * Compute correct namelen + sa_len for AF_INET/AF_INET6
++ */
++#if defined(__FreeBSD__)
++static inline socklen_t _bsd_sockaddr_len_fix(struct sockaddr *sa)
++{
++	if (!sa) return 0;
++	switch (sa->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *in = (struct sockaddr_in *)sa;
++		in->sin_len = (uint8_t)sizeof(*in);
++		return sizeof(*in);
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa;
++		in6->sin6_len = (uint8_t)sizeof(*in6);
++		return sizeof(*in6);
++	}
++	default: return sizeof(*sa);
++	}
++}
++#endif
++
++/*
+  * Define slurm-specific aliases for use by plugins, see slurm_xlator.h
+  * for details.
+  */
+@@ -96,12 +119,18 @@ int net_stream_listen(int *fd, uint16_t *port)
+ 	/* bind ephemeral port */
+ 	slurm_setup_addr(&sin, 0);
+ 
++#if defined(__FreeBSD__)
++	socklen_t blen = _bsd_sockaddr_len_fix((struct sockaddr *)&sin);
++#else
++	socklen_t blen = sizeof(sin);
++#endif
+ 	if ((*fd = socket(sin.ss_family, SOCK_STREAM, IPPROTO_TCP)) < 0)
+ 		return -1;
+ 
+ 	if (setsockopt(*fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)) < 0)
+ 		goto cleanup;
+-	if (bind(*fd, (struct sockaddr *) &sin, len) < 0)
++	/* Single bind(): pass platform-correct namelen */
++	if (bind(*fd, (struct sockaddr *) &sin, blen) < 0)
+ 		goto cleanup;
+ 	if (getsockname(*fd, (struct sockaddr *) &sin, &len) < 0)
+ 		goto cleanup;
+@@ -219,7 +248,15 @@ static bool _is_port_ok(int s, uint16_t port, bool loc
+ 		return false;
+ 	}
+ 
++/*
++ * compute correct socket length and pass it to bind() as blen on FreeBSD
++ */
++#if defined(__FreeBSD__)
++	socklen_t blen = _bsd_sockaddr_len_fix((struct sockaddr *)&addr);
++	if (bind(s, (struct sockaddr *) &addr, blen) < 0) {
++#else
+ 	if (bind(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
++#endif
+ 		log_flag(NET, "%s: bind() failed on port:%d fd:%d: %m",
+ 			 __func__, port, s);
+ 		return false;
diff --git a/sysutils/slurm-wlm/files/patch-src_common_sack__api.c b/sysutils/slurm-wlm/files/patch-src_common_sack__api.c
new file mode 100644
index 000000000000..fe7520fde339
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_common_sack__api.c
@@ -0,0 +1,28 @@
+--- src/common/sack_api.c.orig	2024-05-21 17:19:51 UTC
++++ src/common/sack_api.c
+@@ -38,6 +38,8 @@
+ #include <sys/socket.h>
+ #include <sys/types.h>
+ #include <sys/un.h>
++#include <string.h>
++#include <stddef.h>
+ #include <unistd.h>
+ 
+ #include "src/common/fd.h"
+@@ -63,7 +65,15 @@ static int _sack_try_connection(struct sockaddr_un *ad
+ static int _sack_try_connection(struct sockaddr_un *addr)
+ {
+ 	int fd;
+-	size_t len = strlen(addr->sun_path) + 1 + sizeof(addr->sun_family);
++	socklen_t len;
++
++/* FreeBSD requires sun_len and a correct namelen */
++#if defined(__FreeBSD__)
++	len = (socklen_t)SUN_LEN(addr);
++	addr->sun_len = (uint8_t)len;
++#else
++	len = (socklen_t)(strlen(addr->sun_path) + 1 + sizeof(addr->sun_family));
++#endif
+ 
+ 	if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+ 		debug3("%s: socket() failed: %m", __func__);
diff --git a/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c b/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c
index d5ae5ef84c32..b4197166e32d 100644
--- a/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c
+++ b/sysutils/slurm-wlm/files/patch-src_common_slurm__protocol__socket.c
@@ -1,39 +1,66 @@
 --- src/common/slurm_protocol_socket.c.orig	2024-05-21 17:19:51 UTC
 +++ src/common/slurm_protocol_socket.c
-@@ -491,6 +491,27 @@ extern int slurm_init_msg_engine(slurm_addr_t *addr, b
- extern int slurm_init_msg_engine(slurm_addr_t *addr, bool quiet)
- {
- 	int rc;
+@@ -53,7 +53,9 @@
+ #include <sys/socket.h>
+ #include <sys/time.h>
+ #include <sys/types.h>
++#include <sys/un.h>
+ #include <unistd.h>
++#include <fcntl.h>
+ 
+ #include "slurm/slurm_errno.h"
+ #include "src/common/read_config.h"
+@@ -74,6 +76,28 @@
+ #define RANDOM_USER_PORT ((uint16_t) ((lrand48() % \
+ 		(MAX_USER_PORT - MIN_USER_PORT + 1)) + MIN_USER_PORT))
+ 
++#if defined(__FreeBSD__)
++/* Normalize sockaddr length on FreeBSD and return the proper namelen */
++static inline socklen_t _bsd_sockaddr_len_fix(struct sockaddr *sa)
++{
++	if (!sa) return 0;
++	switch (sa->sa_family) {
++	case AF_INET: {
++		struct sockaddr_in *in = (struct sockaddr_in *)sa;
++		in->sin_len = (uint8_t)sizeof(*in);
++		return sizeof(*in);
++	}
++	case AF_INET6: {
++		struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa;
++		in6->sin6_len = (uint8_t)sizeof(*in6);
++		return sizeof(*in6);
++	}
++	default:
++		return sizeof(*sa);
++	}
++}
++#endif
 +
+ /* Static functions */
+ static int _slurm_connect(int __fd, struct sockaddr const * __addr,
+ 			  socklen_t __len);
+@@ -115,8 +139,13 @@ static void _sock_bind_wild(int sockfd)
+ 
+ 	slurm_setup_addr(&sin, RANDOM_USER_PORT);
+ 
++	socklen_t blen = _bsd_sockaddr_len_fix((struct sockaddr *)&sin);
+ 	for (retry=0; retry < PORT_RETRIES ; retry++) {
 +	#if defined(__FreeBSD__)
-+		/*
-+		 * FreeBSD requires the sa_len field to be set correctly in
-+		 * struct sockaddr_in / sockaddr_in6 before calling bind().
-+		 * If it is unset, bind() may fail with EINVAL.
-+		 *
-+		 * This adjustment ensures the correct length is set based
-+		 * on the address family before bind() is called.
-+		 */
-+		if (addr->ss_family == AF_INET) {
-+			((struct sockaddr_in *)addr)->sin_len = sizeof(struct sockaddr_in);
-+		} else if (addr->ss_family == AF_INET6) {
-+			((struct sockaddr_in6 *)addr)->sin6_len = sizeof(struct sockaddr_in6);
-+		}
-+
-+		/* Track the correct length for bind() */
-+		socklen_t bind_len = (addr->ss_family == AF_INET6)
-+			? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
++		rc = bind(sockfd, (struct sockaddr *) &sin, blen);
++	#else
+ 		rc = bind(sockfd, (struct sockaddr *) &sin, sizeof(sin));
 +	#endif
-+
- 	int fd;
- 	int log_lvl = LOG_LEVEL_ERROR;
- 	const int one = 1;
-@@ -511,7 +532,11 @@ extern int slurm_init_msg_engine(slurm_addr_t *addr, b
+ 		if (rc >= 0)
+ 			break;
+ 		slurm_set_port(&sin, RANDOM_USER_PORT);
+@@ -511,7 +540,13 @@ extern int slurm_init_msg_engine(slurm_addr_t *addr, b
  		goto error;
  	}
  
 -	rc = bind(fd, (struct sockaddr const *) addr, sizeof(*addr));
 +	#if defined(__FreeBSD__)
++		/* FreeBSD: normalize sa_len and pass the exact namelen to bind() */
++		socklen_t bind_len = _bsd_sockaddr_len_fix((struct sockaddr *)addr);
 +		rc = bind(fd, (struct sockaddr const *) addr, bind_len);
 +	#else
 +		rc = bind(fd, (struct sockaddr const *) addr, sizeof(*addr));
@@ -41,3 +68,31 @@
  	if (rc < 0) {
  		format_print(log_lvl, "Error binding slurm stream socket: %m");
  		goto error;
+@@ -666,7 +701,27 @@ static int _slurm_connect (int __fd, struct sockaddr c
+ 	if (fcntl(__fd, F_SETFL, flags | O_NONBLOCK) < 0)
+ 		error("%s: fcntl(F_SETFL) error: %m", __func__);
+ 
++/* FreeBSD requires correct sa_len/namelen; recompute here */
++#if defined(__FreeBSD__)
++	{
++		struct sockaddr *sa = (struct sockaddr *)__addr;
++		socklen_t namelen;
++		switch (sa->sa_family) {
++		case AF_UNIX: {
++			struct sockaddr_un *un = (struct sockaddr_un *)sa;
++			namelen = (socklen_t)SUN_LEN(un);
++			un->sun_len = (uint8_t)namelen;
++			break;
++		}
++		default:
++			namelen = _bsd_sockaddr_len_fix(sa);
++			break;
++		}
++		rc = connect(__fd, sa, namelen);
++	}
++#else
+ 	rc = connect(__fd , __addr , __len);
++#endif
+ 	if ((rc < 0) && (errno != EINPROGRESS))
+ 		return errno;
+ 	if (rc == 0)
diff --git a/sysutils/slurm-wlm/files/patch-src_common_stepd__api.c b/sysutils/slurm-wlm/files/patch-src_common_stepd__api.c
new file mode 100644
index 000000000000..c01bbebffe33
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_common_stepd__api.c
@@ -0,0 +1,36 @@
+--- src/common/stepd_api.c.orig	2024-05-21 17:19:51 UTC
++++ src/common/stepd_api.c
+@@ -48,6 +48,7 @@
+ #include <signal.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <stddef.h>
+ #include <sys/socket.h>
+ #include <sys/stat.h>
+ #include <sys/time.h>
+@@ -146,7 +147,7 @@ _step_connect(const char *directory, const char *noden
+ 	      slurm_step_id_t *step_id)
+ {
+ 	int fd;
+-	int len;
++	socklen_t len;
+ 	struct sockaddr_un addr;
+ 	char *name = NULL, *pos = NULL;
+ 	uint32_t stepid = step_id->step_id;
+@@ -177,7 +178,15 @@ _step_connect(const char *directory, const char *noden
+ 	memset(&addr, 0, sizeof(addr));
+ 	addr.sun_family = AF_UNIX;
+ 	strlcpy(addr.sun_path, name, sizeof(addr.sun_path));
+-	len = strlen(addr.sun_path) + 1 + sizeof(addr.sun_family);
++
++#if defined(__FreeBSD__)
++	/* Compute exact namelen from the populated path */
++	len = (socklen_t)SUN_LEN(&addr);
++	/* FreeBSD requires sun_len to match the exact length */
++	addr.sun_len = (uint8_t)len;
++#else
++	len = (socklen_t)(strlen(addr.sun_path) + 1 + sizeof(addr.sun_family));
++#endif
+ 
+ 	if (connect(fd, (struct sockaddr *) &addr, len) < 0) {
+ 		/* Can indicate race condition at step termination */
diff --git a/sysutils/slurm-wlm/files/patch-src_common_xsystemd.c b/sysutils/slurm-wlm/files/patch-src_common_xsystemd.c
new file mode 100644
index 000000000000..91e7dc9b41ac
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_common_xsystemd.c
@@ -0,0 +1,25 @@
+--- src/common/xsystemd.c.orig	2024-05-21 17:19:51 UTC
++++ src/common/xsystemd.c
+@@ -36,6 +36,9 @@
+ 
+ #include <sys/socket.h>
+ #include <sys/un.h>
++#include <string.h>
++#include <stdlib.h>
++#include <stddef.h>
+ 
+ #include "src/common/log.h"
+ #include "src/common/slurm_protocol_defs.h"
+@@ -57,7 +60,12 @@ extern void xsystemd_change_mainpid(pid_t pid)
+ 	}
+ 
+ 	strlcpy(addr.sun_path, notify_socket, sizeof(addr.sun_path));
++#if defined(__FreeBSD__)
++	len = (socklen_t)SUN_LEN(&addr);
++	addr.sun_len = (uint8_t)len;
++#else
+ 	len = strlen(addr.sun_path) + 1 + sizeof(addr.sun_family);
++#endif
+ 
+ 	if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
+ 		error("%s: socket() failed: %m", __func__);
diff --git a/sysutils/slurm-wlm/files/patch-src_plugins_auth_slurm_sack.c b/sysutils/slurm-wlm/files/patch-src_plugins_auth_slurm_sack.c
new file mode 100644
index 000000000000..f0ee2bac99e6
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_plugins_auth_slurm_sack.c
@@ -0,0 +1,17 @@
+--- src/plugins/auth/slurm/sack.c.orig	2024-05-21 17:19:51 UTC
++++ src/plugins/auth/slurm/sack.c
+@@ -278,8 +278,12 @@ extern void init_sack_conmgr(void)
+ 
+ 	/* set value of socket path */
+ 	mask = umask(0);
+-	if ((rc = bind(fd, (const struct sockaddr *) &addr,
+-		       sizeof(addr))))
++/* AF_UNIX length handling */
++	socklen_t len = SUN_LEN(&addr);
++#if defined(__FreeBSD__)
++	addr.sun_len = (uint8_t)len;
++#endif
++	if ((rc = bind(fd, (const struct sockaddr *)&addr, len)))
+ 		fatal("%s: [%s] Unable to bind UNIX socket: %m",
+ 		      __func__, addr.sun_path);
+ 	umask(mask);
diff --git a/sysutils/slurm-wlm/files/patch-src_slurmd_slurmstepd_req.c b/sysutils/slurm-wlm/files/patch-src_slurmd_slurmstepd_req.c
new file mode 100644
index 000000000000..0e5b06d1dacf
--- /dev/null
+++ b/sysutils/slurm-wlm/files/patch-src_slurmd_slurmstepd_req.c
@@ -0,0 +1,15 @@
+--- src/slurmd/slurmstepd/req.c.orig	2024-05-21 17:19:51 UTC
++++ src/slurmd/slurmstepd/req.c
+@@ -176,7 +176,11 @@ _create_socket(const char *name)
+ 	memset(&addr, 0, sizeof(addr));
+ 	addr.sun_family = AF_UNIX;
+ 	strlcpy(addr.sun_path, name, sizeof(addr.sun_path));
+-	len = strlen(addr.sun_path)+1 + sizeof(addr.sun_family);
++/* AF_UNIX length handling */
++	len = SUN_LEN(&addr);
++#if defined(__FreeBSD__)
++	addr.sun_len = (uint8_t)len;
++#endif
+ 
+ 	/* bind the name to the descriptor */
+ 	if (bind(fd, (struct sockaddr *) &addr, len) < 0) {