svn commit: r220525 - stable/8/sbin/hastd
Mikolaj Golub
trociny at FreeBSD.org
Sun Apr 10 15:48:16 UTC 2011
Author: trociny
Date: Sun Apr 10 15:48:16 2011
New Revision: 220525
URL: http://svn.freebsd.org/changeset/base/220525
Log:
MFC r220005, r220006, r220007, r220266, r220270, r220271, r220272,
r220273, r220274:
r220005 (pjd):
Use role2str() when setting process title.
r220006 (pjd):
Use timeout from configuration file not only when sending and receiving,
but also when establishing connection.
r220007 (pjd):
Add mapsize to the header just before sending the packet.
Before it could change later and we were sending invalid mapsize.
Some time ago I added optimization where when nodes are connected for the
first time and there were no writes to them yet, there is no initial full
synchronization. This bug prevented it from working.
r220266 (pjd):
Handle the problem described in r220264 by using GEOM GATE queue of unlimited
length. This should fix deadlocks reported by HAST users.
r220270 (pjd):
Allow to disable sends or receives on a socket using shutdown(2) by
interpreting NULL 'data' argument passed to proto_common_send() or
proto_common_recv() as a will to do so.
r220271 (pjd):
Declare directions for sockets between primary and secondary.
In HAST we use two sockets - one for only sending the data and one for only
receiving the data.
r220272 (pjd):
When we are operating on blocking socket and get EAGAIN on send(2) or recv(2)
this means that request timed out. Translate the meaningless EAGAIN to
ETIMEDOUT to give administrator a hint that he might need to increase timeout
in configuration file.
r220273 (pjd):
Handle ENOBUFS on send(2) by retrying for a while and logging the problem.
r220274 (pjd):
Increase default timeout from 5 seconds to 20 seconds. 5 seconds is definitely
to short under heavy load and I was experiencing those timeouts in my recent
tests.
Approved by: pjd (mentor)
Modified:
stable/8/sbin/hastd/hast.conf.5
stable/8/sbin/hastd/hast.h
stable/8/sbin/hastd/primary.c
stable/8/sbin/hastd/proto_common.c
stable/8/sbin/hastd/secondary.c
Directory Properties:
stable/8/sbin/hastd/ (props changed)
Modified: stable/8/sbin/hastd/hast.conf.5
==============================================================================
--- stable/8/sbin/hastd/hast.conf.5 Sun Apr 10 15:40:57 2011 (r220524)
+++ stable/8/sbin/hastd/hast.conf.5 Sun Apr 10 15:48:16 2011 (r220525)
@@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 20, 2011
+.Dd April 2, 2011
.Dt HAST.CONF 5
.Os
.Sh NAME
@@ -241,7 +241,7 @@ LZF is very fast, general purpose compre
.Pp
Connection timeout in seconds.
The default value is
-.Va 5 .
+.Va 20 .
.It Ic exec Aq path
.Pp
Execute the given program on various HAST events.
Modified: stable/8/sbin/hastd/hast.h
==============================================================================
--- stable/8/sbin/hastd/hast.h Sun Apr 10 15:40:57 2011 (r220524)
+++ stable/8/sbin/hastd/hast.h Sun Apr 10 15:48:16 2011 (r220525)
@@ -83,7 +83,7 @@
#define HIO_KEEPALIVE 5
#define HAST_USER "hast"
-#define HAST_TIMEOUT 5
+#define HAST_TIMEOUT 20
#define HAST_CONFIG "/etc/hast.conf"
#define HAST_CONTROL "/var/run/hastctl"
#define HASTD_LISTEN "tcp4://0.0.0.0:8457"
Modified: stable/8/sbin/hastd/primary.c
==============================================================================
--- stable/8/sbin/hastd/primary.c Sun Apr 10 15:40:57 2011 (r220524)
+++ stable/8/sbin/hastd/primary.c Sun Apr 10 15:48:16 2011 (r220525)
@@ -509,7 +509,7 @@ primary_connect(struct hast_resource *re
primary_exit(EX_TEMPFAIL,
"Unable to receive connection from parent");
}
- if (proto_connect_wait(conn, HAST_TIMEOUT) < 0) {
+ if (proto_connect_wait(conn, res->hr_timeout) < 0) {
pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
res->hr_remoteaddr);
proto_close(conn);
@@ -701,6 +701,11 @@ init_remote(struct hast_resource *res, s
(void)hast_activemap_flush(res);
}
nv_free(nvin);
+ /* Setup directions. */
+ if (proto_send(out, NULL, 0) == -1)
+ pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
+ if (proto_recv(in, NULL, 0) == -1)
+ pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
pjdlog_info("Connected to %s.", res->hr_remoteaddr);
if (inp != NULL && outp != NULL) {
*inp = in;
@@ -761,7 +766,7 @@ init_ggate(struct hast_resource *res)
ggiocreate.gctl_mediasize = res->hr_datasize;
ggiocreate.gctl_sectorsize = res->hr_local_sectorsize;
ggiocreate.gctl_flags = 0;
- ggiocreate.gctl_maxcount = G_GATE_MAX_QUEUE_SIZE;
+ ggiocreate.gctl_maxcount = 0;
ggiocreate.gctl_timeout = 0;
ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s",
@@ -868,7 +873,7 @@ hastd_primary(struct hast_resource *res)
pjdlog_init(mode);
pjdlog_debug_set(debuglevel);
pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
- setproctitle("%s (primary)", res->hr_name);
+ setproctitle("%s (%s)", res->hr_name, role2str(res->hr_role));
init_local(res);
init_ggate(res);
Modified: stable/8/sbin/hastd/proto_common.c
==============================================================================
--- stable/8/sbin/hastd/proto_common.c Sun Apr 10 15:40:57 2011 (r220524)
+++ stable/8/sbin/hastd/proto_common.c Sun Apr 10 15:48:16 2011 (r220525)
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
+ * Copyright (c) 2011 Pawel Jakub Dawidek <pawel at dawidek.net>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -34,8 +35,11 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <strings.h>
+#include <unistd.h>
#include "pjdlog.h"
#include "proto_impl.h"
@@ -45,6 +49,16 @@ __FBSDID("$FreeBSD$");
#define MAX_SEND_SIZE 32768
#endif
+static bool
+blocking_socket(int sock)
+{
+ int flags;
+
+ flags = fcntl(sock, F_GETFL);
+ PJDLOG_ASSERT(flags >= 0);
+ return ((flags & O_NONBLOCK) == 0);
+}
+
static int
proto_descriptor_send(int sock, int fd)
{
@@ -80,24 +94,65 @@ proto_common_send(int sock, const unsign
{
ssize_t done;
size_t sendsize;
+ int errcount = 0;
PJDLOG_ASSERT(sock >= 0);
+
+ if (data == NULL) {
+ /* The caller is just trying to decide about direction. */
+
+ PJDLOG_ASSERT(size == 0);
+
+ if (shutdown(sock, SHUT_RD) == -1)
+ return (errno);
+ return (0);
+ }
+
PJDLOG_ASSERT(data != NULL);
PJDLOG_ASSERT(size > 0);
do {
sendsize = size < MAX_SEND_SIZE ? size : MAX_SEND_SIZE;
done = send(sock, data, sendsize, MSG_NOSIGNAL);
- if (done == 0)
+ if (done == 0) {
return (ENOTCONN);
- else if (done < 0) {
+ } else if (done < 0) {
if (errno == EINTR)
continue;
+ if (errno == ENOBUFS) {
+ /*
+ * If there are no buffers we retry.
+ * After each try we increase delay before the
+ * next one and we give up after fifteen times.
+ * This gives 11s of total wait time.
+ */
+ if (errcount == 15) {
+ pjdlog_warning("Getting ENOBUFS errors for 11s on send(), giving up.");
+ } else {
+ if (errcount == 0)
+ pjdlog_warning("Got ENOBUFS error on send(), retrying for a bit.");
+ errcount++;
+ usleep(100000 * errcount);
+ continue;
+ }
+ }
+ /*
+ * If this is blocking socket and we got EAGAIN, this
+ * means the request timed out. Translate errno to
+ * ETIMEDOUT, to give administrator a hint to
+ * eventually increase timeout.
+ */
+ if (errno == EAGAIN && blocking_socket(sock))
+ errno = ETIMEDOUT;
return (errno);
}
data += done;
size -= done;
} while (size > 0);
+ if (errcount > 0) {
+ pjdlog_info("Data sent successfully after %d ENOBUFS error%s.",
+ errcount, errcount == 1 ? "" : "s");
+ }
if (fd == -1)
return (0);
@@ -141,16 +196,36 @@ proto_common_recv(int sock, unsigned cha
ssize_t done;
PJDLOG_ASSERT(sock >= 0);
+
+ if (data == NULL) {
+ /* The caller is just trying to decide about direction. */
+
+ PJDLOG_ASSERT(size == 0);
+
+ if (shutdown(sock, SHUT_WR) == -1)
+ return (errno);
+ return (0);
+ }
+
PJDLOG_ASSERT(data != NULL);
PJDLOG_ASSERT(size > 0);
do {
done = recv(sock, data, size, MSG_WAITALL);
} while (done == -1 && errno == EINTR);
- if (done == 0)
+ if (done == 0) {
return (ENOTCONN);
- else if (done < 0)
+ } else if (done < 0) {
+ /*
+ * If this is blocking socket and we got EAGAIN, this
+ * means the request timed out. Translate errno to
+ * ETIMEDOUT, to give administrator a hint to
+ * eventually increase timeout.
+ */
+ if (errno == EAGAIN && blocking_socket(sock))
+ errno = ETIMEDOUT;
return (errno);
+ }
if (fdp == NULL)
return (0);
return (proto_descriptor_recv(sock, fdp));
Modified: stable/8/sbin/hastd/secondary.c
==============================================================================
--- stable/8/sbin/hastd/secondary.c Sun Apr 10 15:40:57 2011 (r220524)
+++ stable/8/sbin/hastd/secondary.c Sun Apr 10 15:48:16 2011 (r220525)
@@ -183,6 +183,10 @@ init_remote(struct hast_resource *res, s
unsigned char *map;
size_t mapsize;
+ /* Setup direction. */
+ if (proto_send(res->hr_remoteout, NULL, 0) == -1)
+ pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
+
map = NULL;
mapsize = 0;
nvout = nv_alloc();
@@ -201,7 +205,6 @@ init_remote(struct hast_resource *res, s
"Unable to allocate memory (%zu bytes) for activemap.",
mapsize);
}
- nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize");
/*
* When we work as primary and secondary is missing we will increase
* localcnt in our metadata. When secondary is connected and synced
@@ -339,6 +342,7 @@ init_remote(struct hast_resource *res, s
(uintmax_t)res->hr_secondary_localcnt,
(uintmax_t)res->hr_secondary_remotecnt);
}
+ nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize");
if (hast_proto_send(res, res->hr_remotein, nvout, map, mapsize) < 0) {
pjdlog_exit(EX_TEMPFAIL, "Unable to send activemap to %s",
res->hr_remoteaddr);
@@ -346,6 +350,9 @@ init_remote(struct hast_resource *res, s
if (map != NULL)
free(map);
nv_free(nvout);
+ /* Setup direction. */
+ if (proto_recv(res->hr_remotein, NULL, 0) == -1)
+ pjdlog_errno(LOG_WARNING, "Unable to set connection direction");
if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
/* Exit on split-brain. */
@@ -414,7 +421,7 @@ hastd_secondary(struct hast_resource *re
pjdlog_init(mode);
pjdlog_debug_set(debuglevel);
pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
- setproctitle("%s (secondary)", res->hr_name);
+ setproctitle("%s (%s)", res->hr_name, role2str(res->hr_role));
PJDLOG_VERIFY(sigemptyset(&mask) == 0);
PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
More information about the svn-src-stable-8
mailing list