svn commit: r204076 - in head: etc/defaults etc/rc.d sbin sbin/ggate/ggatec sbin/ggate/ggatel sbin/hastctl sbin/hastd share/examples share/examples/hast share/man/man5 sys/geom/gate

Robert Watson rwatson at FreeBSD.org
Thu Feb 18 23:22:32 UTC 2010


On Thu, 18 Feb 2010, Pawel Jakub Dawidek wrote:

>  Please welcome HAST - Highly Avalable Storage.

Excellent news!  I know a number of shops will be very excited to see this in 
the tree.

Could you say a little about the future of ggated in light of the new arrival?

Robert

>
>  HAST allows to transparently store data on two physically separated machines
>  connected over the TCP/IP network. HAST works in Primary-Secondary
>  (Master-Backup, Master-Slave) configuration, which means that only one of the
>  cluster nodes can be active at any given time. Only Primary node is able to
>  handle I/O requests to HAST-managed devices. Currently HAST is limited to two
>  cluster nodes in total.
>
>  HAST operates on block level - it provides disk-like devices in /dev/hast/
>  directory for use by file systems and/or applications. Working on block level
>  makes it transparent for file systems and applications. There in no difference
>  between using HAST-provided device and raw disk, partition, etc. All of them
>  are just regular GEOM providers in FreeBSD.
>
>  For more information please consult hastd(8), hastctl(8) and hast.conf(5)
>  manual pages, as well as http://wiki.FreeBSD.org/HAST.
>
>  Sponsored by:	FreeBSD Foundation
>  Sponsored by:	OMCnet Internet Service GmbH
>  Sponsored by:	TransIP BV
>
> Added:
>  head/etc/rc.d/hastd   (contents, props changed)
>  head/sbin/hastctl/
>  head/sbin/hastctl/Makefile   (contents, props changed)
>  head/sbin/hastctl/hastctl.8   (contents, props changed)
>  head/sbin/hastctl/hastctl.c   (contents, props changed)
>  head/sbin/hastd/
>  head/sbin/hastd/Makefile   (contents, props changed)
>  head/sbin/hastd/activemap.c   (contents, props changed)
>  head/sbin/hastd/activemap.h   (contents, props changed)
>  head/sbin/hastd/control.c   (contents, props changed)
>  head/sbin/hastd/control.h   (contents, props changed)
>  head/sbin/hastd/ebuf.c   (contents, props changed)
>  head/sbin/hastd/ebuf.h   (contents, props changed)
>  head/sbin/hastd/hast.conf.5   (contents, props changed)
>  head/sbin/hastd/hast.h   (contents, props changed)
>  head/sbin/hastd/hast_proto.c   (contents, props changed)
>  head/sbin/hastd/hast_proto.h   (contents, props changed)
>  head/sbin/hastd/hastd.8   (contents, props changed)
>  head/sbin/hastd/hastd.c   (contents, props changed)
>  head/sbin/hastd/hastd.h   (contents, props changed)
>  head/sbin/hastd/hooks.c   (contents, props changed)
>  head/sbin/hastd/hooks.h   (contents, props changed)
>  head/sbin/hastd/metadata.c   (contents, props changed)
>  head/sbin/hastd/metadata.h   (contents, props changed)
>  head/sbin/hastd/nv.c   (contents, props changed)
>  head/sbin/hastd/nv.h   (contents, props changed)
>  head/sbin/hastd/parse.y   (contents, props changed)
>  head/sbin/hastd/pjdlog.c   (contents, props changed)
>  head/sbin/hastd/pjdlog.h   (contents, props changed)
>  head/sbin/hastd/primary.c   (contents, props changed)
>  head/sbin/hastd/proto.c   (contents, props changed)
>  head/sbin/hastd/proto.h   (contents, props changed)
>  head/sbin/hastd/proto_common.c   (contents, props changed)
>  head/sbin/hastd/proto_impl.h   (contents, props changed)
>  head/sbin/hastd/proto_socketpair.c   (contents, props changed)
>  head/sbin/hastd/proto_tcp4.c   (contents, props changed)
>  head/sbin/hastd/proto_uds.c   (contents, props changed)
>  head/sbin/hastd/rangelock.c   (contents, props changed)
>  head/sbin/hastd/rangelock.h   (contents, props changed)
>  head/sbin/hastd/secondary.c   (contents, props changed)
>  head/sbin/hastd/subr.c   (contents, props changed)
>  head/sbin/hastd/subr.h   (contents, props changed)
>  head/sbin/hastd/synch.h   (contents, props changed)
>  head/sbin/hastd/token.l   (contents, props changed)
>  head/share/examples/hast/
>  head/share/examples/hast/ucarp.sh   (contents, props changed)
>  head/share/examples/hast/ucarp_down.sh   (contents, props changed)
>  head/share/examples/hast/ucarp_up.sh   (contents, props changed)
>  head/share/examples/hast/vip-down.sh   (contents, props changed)
>  head/share/examples/hast/vip-up.sh   (contents, props changed)
> Modified:
>  head/etc/defaults/rc.conf
>  head/etc/rc.d/Makefile
>  head/sbin/Makefile
>  head/sbin/ggate/ggatec/ggatec.c
>  head/sbin/ggate/ggatel/ggatel.c
>  head/share/examples/Makefile
>  head/share/man/man5/rc.conf.5
>  head/sys/geom/gate/g_gate.c
>  head/sys/geom/gate/g_gate.h
>
> Modified: head/etc/defaults/rc.conf
> ==============================================================================
> --- head/etc/defaults/rc.conf	Thu Feb 18 23:04:01 2010	(r204075)
> +++ head/etc/defaults/rc.conf	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -260,6 +260,9 @@ syslogd_flags="-s"		# Flags to syslogd (
> inetd_enable="NO"		# Run the network daemon dispatcher (YES/NO).
> inetd_program="/usr/sbin/inetd"	# path to inetd, if you want a different one.
> inetd_flags="-wW -C 60"		# Optional flags to inetd
> +hastd_enable="NO"		# Run the HAST daemon (YES/NO).
> +hastd_program="/sbin/hastd"	# path to hastd, if you want a different one.
> +hastd_flags=""			# Optional flags to hastd.
> #
> # named.  It may be possible to run named in a sandbox, man security for
> # details.
>
> Modified: head/etc/rc.d/Makefile
> ==============================================================================
> --- head/etc/rc.d/Makefile	Thu Feb 18 23:04:01 2010	(r204075)
> +++ head/etc/rc.d/Makefile	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -12,7 +12,7 @@ FILES=	DAEMON FILESYSTEMS LOGIN NETWORKI
> 	encswap \
> 	faith fsck ftp-proxy ftpd \
> 	gbde geli geli2 gssd \
> -	hcsecd \
> +	hastd hcsecd \
> 	hostapd hostid hostid_save hostname \
> 	inetd initrandom \
> 	ip6addrctl ipfilter ipfs ipfw ipmon \
>
> Added: head/etc/rc.d/hastd
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/etc/rc.d/hastd	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,31 @@
> +#!/bin/sh
> +#
> +# $FreeBSD$
> +#
> +
> +# PROVIDE: hastd
> +# REQUIRE: NETWORKING syslogd
> +# BEFORE:  DAEMON
> +
> +. /etc/rc.subr
> +
> +name="hastd"
> +rcvar=`set_rcvar`
> +pidfile="/var/run/${name}.pid"
> +command="/sbin/${name}"
> +hastctl="/sbin/hastctl"
> +required_files="/etc/hast.conf"
> +stop_precmd="hastd_stop_precmd"
> +required_modules="geom_gate:g_gate"
> +
> +sockfile="/var/run/syslogd.sockets"
> +evalargs="rc_flags=\"\`set_socketlist\` \$rc_flags\""
> +altlog_proglist="named"
> +
> +hastd_stop_precmd()
> +{
> +	${hastctl} role init all
> +}
> +
> +load_rc_config $name
> +run_rc_command "$1"
>
> Modified: head/sbin/Makefile
> ==============================================================================
> --- head/sbin/Makefile	Thu Feb 18 23:04:01 2010	(r204075)
> +++ head/sbin/Makefile	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -36,6 +36,8 @@ SUBDIR=	adjkerntz \
> 	ggate \
> 	growfs \
> 	gvinum \
> +	hastctl \
> +	hastd \
> 	ifconfig \
> 	init \
> 	${_ipf} \
>
> Modified: head/sbin/ggate/ggatec/ggatec.c
> ==============================================================================
> --- head/sbin/ggate/ggatec/ggatec.c	Thu Feb 18 23:04:01 2010	(r204075)
> +++ head/sbin/ggate/ggatec/ggatec.c	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -59,7 +59,7 @@ enum { UNSET, CREATE, DESTROY, LIST, RES
>
> static const char *path = NULL;
> static const char *host = NULL;
> -static int unit = -1;
> +static int unit = G_GATE_UNIT_AUTO;
> static unsigned flags = 0;
> static int force = 0;
> static unsigned queue_size = G_GATE_QUEUE_SIZE;
>
> Modified: head/sbin/ggate/ggatel/ggatel.c
> ==============================================================================
> --- head/sbin/ggate/ggatel/ggatel.c	Thu Feb 18 23:04:01 2010	(r204075)
> +++ head/sbin/ggate/ggatel/ggatel.c	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -50,7 +50,7 @@
> enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;
>
> static const char *path = NULL;
> -static int unit = -1;
> +static int unit = G_GATE_UNIT_AUTO;
> static unsigned flags = 0;
> static int force = 0;
> static unsigned queue_size = G_GATE_QUEUE_SIZE;
>
> Added: head/sbin/hastctl/Makefile
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sbin/hastctl/Makefile	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,36 @@
> +# $FreeBSD$
> +
> +.include <bsd.own.mk>
> +
> +.PATH:	${.CURDIR}/../hastd
> +
> +PROG=	hastctl
> +SRCS=	activemap.c
> +SRCS+=	ebuf.c
> +SRCS+=	hast_proto.c hastctl.c
> +SRCS+=	metadata.c
> +SRCS+=	nv.c
> +SRCS+=	parse.y pjdlog.c
> +SRCS+=	proto.c proto_common.c proto_tcp4.c proto_uds.c
> +SRCS+=	token.l
> +SRCS+=	subr.c
> +SRCS+=	y.tab.h
> +WARNS?=	6
> +MAN=	hastctl.8
> +
> +CFLAGS+=-I${.CURDIR}/../hastd
> +CFLAGS+=-DINET
> +.if ${MK_INET6_SUPPORT} != "no"
> +CFLAGS+=-DINET6
> +.endif
> +# This is needed to have WARNS > 1.
> +CFLAGS+=-DYY_NO_UNPUT
> +
> +DPADD=	${LIBCRYPTO} ${LIBL}
> +LDADD=	-lcrypto -ll
> +
> +YFLAGS+=-v
> +
> +CLEANFILES=y.tab.c y.tab.h y.output
> +
> +.include <bsd.prog.mk>
>
> Added: head/sbin/hastctl/hastctl.8
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sbin/hastctl/hastctl.8	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,217 @@
> +.\" Copyright (c) 2010 The FreeBSD Foundation
> +.\" All rights reserved.
> +.\"
> +.\" This software was developed by Pawel Jakub Dawidek under sponsorship from
> +.\" the FreeBSD Foundation.
> +.\"
> +.\" Redistribution and use in source and binary forms, with or without
> +.\" modification, are permitted provided that the following conditions
> +.\" are met:
> +.\" 1. Redistributions of source code must retain the above copyright
> +.\"    notice, this list of conditions and the following disclaimer.
> +.\" 2. Redistributions in binary form must reproduce the above copyright
> +.\"    notice, this list of conditions and the following disclaimer in the
> +.\"    documentation and/or other materials provided with the distribution.
> +.\"
> +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> +.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> +.\" SUCH DAMAGE.
> +.\"
> +.\" $FreeBSD$
> +.\"
> +.Dd February 1, 2010
> +.Dt HASTCTL 8
> +.Os
> +.Sh NAME
> +.Nm hastctl
> +.Nd "Highly Available Storage control utility"
> +.Sh SYNOPSIS
> +.Nm
> +.Cm create
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Fl e Ar extentsize
> +.Op Fl k Ar keepdirty
> +.Op Fl m Ar mediasize
> +.Ar name ...
> +.Nm
> +.Cm role
> +.Op Fl d
> +.Op Fl c Ar config
> +.Aq init | primary | secondary
> +.Ar all | name ...
> +.Nm
> +.Cm status
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Ar all | name ...
> +.Nm
> +.Cm dump
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Ar all | name ...
> +.Sh DESCRIPTION
> +The
> +.Nm
> +utility is used to control the behaviour of the
> +.Xr hastd 8
> +daemon.
> +.Pp
> +This utility should be used by HA software like
> +.Nm heartbeat
> +or
> +.Nm ucarp
> +to setup HAST resources role when changing from primary mode to
> +secondary or vice versa.
> +Be aware that if a file system like UFS exists on HAST provider and
> +primary node dies, file system has to be checked for inconsistencies
> +with the
> +.Xr fsck 8
> +utility after switching secondary node to primary role.
> +.Pp
> +The first argument to
> +.Nm
> +indicates an action to be performed:
> +.Bl -tag -width ".Cm create"
> +.It Cm create
> +Initialize local provider configured for the given resource.
> +Additional options include:
> +.Bl -tag -width ".Fl e Ar extentsize"
> +.It Fl e Ar extentsize
> +Size of an extent.
> +Extent is a block which is used for synchronization.
> +.Nm
> +maintains a map of dirty extents and extent is the smallest region that
> +can be marked as dirty.
> +If any part of an extent is modified, entire extent will be synchronized
> +when nodes connect.
> +If extent size is too small, there will be too much disk activity
> +related to dirty map updates, which will degrade performance of the
> +given resource.
> +If extent size is too large, synchronization, even in case of short
> +outage, can take a long time increasing the risk of loosing up-to-date
> +node before synchronization process is completed.
> +The default extent size is
> +.Va 2MB .
> +.It Fl k Ar keepdirty
> +Maximum number of dirty extents to keep dirty all the time.
> +Most recently used extents are kept dirty to reduce number of metadata
> +updates.
> +The default numer of most recently used extents which will be kept
> +dirty is
> +.Va 64 .
> +.It Fl m Ar mediasize
> +Size of the smaller provider used as backend storage on both nodes.
> +This option can be omitted if node providers have the same size on both
> +sides.
> +.El
> +.It Cm role
> +Change role of the given resource.
> +The role can be one of:
> +.Bl -tag -width ".Cm secondary"
> +.It Cm init
> +Resource is turned off.
> +.It Cm primary
> +Local
> +.Xr hastd 8
> +daemon will act as primary node for the given resource.
> +System on which resource role is set to primary can use
> +.Pa /dev/hast/<name>
> +GEOM provider.
> +.It Cm secondary
> +Local
> +.Xr hastd 8
> +daemon will act as secondary node for the given resource - it will wait
> +for connection from the primary node and will handle I/O requests
> +received from it.
> +GEOM provider
> +.Pa /dev/hast/<name>
> +will not be created on secondary node.
> +.El
> +.It Cm status
> +Present status of the configured resources.
> +.It Cm dump
> +Dump metadata stored on local component for the configured resources.
> +.El
> +.Pp
> +In addition, every subcommand can be followed by the following options:
> +.Bl -tag -width ".Fl c Ar config"
> +.It Fl c Ar config
> +Specify alternative location of the configuration file.
> +The default location is
> +.Pa /etc/hast.conf .
> +.It Fl d
> +Print debugging information.
> +This option can be specified multiple times to raise the verbosity
> +level.
> +.El
> +.Sh EXIT STATUS
> +Exit status is 0 on success, or one of the values described in
> +.Xr sysexits 3
> +on failure.
> +.Sh EXAMPLES
> +Initialize HAST provider, create file system on it and mount it.
> +.Bd -literal -offset indent
> +nodeB# hastctl create shared
> +nodeB# hastd
> +nodeB# hastctl role secondary shared
> +
> +nodeB# hastctl create shared
> +nodeA# hastd
> +nodeA# hastctl role primary shared
> +nodeA# newfs -U /dev/hast/shared
> +nodeA# mount -o noatime /dev/hast/shared /shared
> +nodeA# application_start
> +.Ed
> +.Pp
> +Switch roles for the
> +.Nm shared
> +HAST resource.
> +.Bd -literal -offset indent
> +nodeA# application_stop
> +nodeA# umount -f /shared
> +nodeA# hastctl role secondary shared
> +
> +nodeB# hastctl role primary shared
> +nodeB# fsck -t ufs /dev/hast/shared
> +nodeB# mount -o noatime /dev/hast/shared /shared
> +nodeB# application_start
> +.Ed
> +.Sh FILES
> +.Bl -tag -width ".Pa /var/run/hastctl" -compact
> +.It Pa /etc/hast.conf
> +Configuration file for
> +.Nm
> +and
> +.Xr hastd 8 .
> +.It Pa /var/run/hastctl
> +Control socket used by
> +.Nm
> +to communicate with the
> +.Xr hastd 8
> +daemon.
> +.El
> +.Sh SEE ALSO
> +.Xr sysexits 3 ,
> +.Xr geom 4 ,
> +.Xr hast.conf 5 ,
> +.Xr fsck 8 ,
> +.Xr ggatec 8 ,
> +.Xr ggatel 8 ,
> +.Xr hastd 8 ,
> +.Xr mount 8 ,
> +.Xr newfs 8 .
> +.Sh AUTHORS
> +The
> +.Nm
> +was developed by
> +.An Pawel Jakub Dawidek Aq pjd at FreeBSD.org
> +under sponsorship of the FreeBSD Foundation.
>
> Added: head/sbin/hastctl/hastctl.c
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sbin/hastctl/hastctl.c	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,526 @@
> +/*-
> + * Copyright (c) 2009-2010 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Pawel Jakub Dawidek under sponsorship from
> + * the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>
> +#include <sys/disk.h>
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +#include <sys/sysctl.h>
> +
> +#include <assert.h>
> +#include <err.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <limits.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sysexits.h>
> +#include <unistd.h>
> +
> +#include <activemap.h>
> +
> +#include "hast.h"
> +#include "hast_proto.h"
> +#include "metadata.h"
> +#include "nv.h"
> +#include "pjdlog.h"
> +#include "proto.h"
> +#include "subr.h"
> +
> +/* Path to configuration file. */
> +static const char *cfgpath = HAST_CONFIG;
> +/* Hastd configuration. */
> +static struct hastd_config *cfg;
> +/* Control connection. */
> +static struct proto_conn *controlconn;
> +
> +enum {
> +	CMD_INVALID,
> +	CMD_CREATE,
> +	CMD_ROLE,
> +	CMD_STATUS,
> +	CMD_DUMP
> +};
> +
> +static __dead2 void
> +usage(void)
> +{
> +
> +	fprintf(stderr,
> +	    "usage: %s create [-d] [-c config] [-e extentsize] [-k keepdirty]\n"
> +	    "\t\t[-m mediasize] name ...\n",
> +	    getprogname());
> +	fprintf(stderr,
> +	    "       %s role [-d] [-c config] <init | primary | secondary> all | name ...\n",
> +	    getprogname());
> +	fprintf(stderr,
> +	    "       %s status [-d] [-c config] [all | name ...]\n",
> +	    getprogname());
> +	fprintf(stderr,
> +	    "       %s dump [-d] [-c config] [all | name ...]\n",
> +	    getprogname());
> +	exit(EX_USAGE);
> +}
> +
> +static int
> +create_one(struct hast_resource *res, intmax_t mediasize, intmax_t extentsize,
> +    intmax_t keepdirty)
> +{
> +	unsigned char *buf;
> +	size_t mapsize;
> +	int ec;
> +
> +	ec = 0;
> +	pjdlog_prefix_set("[%s] ", res->hr_name);
> +
> +	if (provinfo(res, true) < 0) {
> +		ec = EX_NOINPUT;
> +		goto end;
> +	}
> +	if (mediasize == 0)
> +		mediasize = res->hr_local_mediasize;
> +	else if (mediasize > res->hr_local_mediasize) {
> +		pjdlog_error("Provided mediasize is larger than provider %s size.",
> +		    res->hr_localpath);
> +		ec = EX_DATAERR;
> +		goto end;
> +	}
> +	if (!powerof2(res->hr_local_sectorsize)) {
> +		pjdlog_error("Sector size of provider %s is not power of 2 (%u).",
> +		    res->hr_localpath, res->hr_local_sectorsize);
> +		ec = EX_DATAERR;
> +		goto end;
> +	}
> +	if (extentsize == 0)
> +		extentsize = HAST_EXTENTSIZE;
> +	if (extentsize < res->hr_local_sectorsize) {
> +		pjdlog_error("Extent size (%jd) is less than sector size (%u).",
> +		    (intmax_t)extentsize, res->hr_local_sectorsize);
> +		ec = EX_DATAERR;
> +		goto end;
> +	}
> +	if ((extentsize % res->hr_local_sectorsize) != 0) {
> +		pjdlog_error("Extent size (%jd) is not multiple of sector size (%u).",
> +		    (intmax_t)extentsize, res->hr_local_sectorsize);
> +		ec = EX_DATAERR;
> +		goto end;
> +	}
> +	mapsize = activemap_calc_ondisk_size(mediasize - METADATA_SIZE,
> +	    extentsize, res->hr_local_sectorsize);
> +	if (keepdirty == 0)
> +		keepdirty = HAST_KEEPDIRTY;
> +	res->hr_datasize = mediasize - METADATA_SIZE - mapsize;
> +	res->hr_extentsize = extentsize;
> +	res->hr_keepdirty = keepdirty;
> +
> +	res->hr_localoff = METADATA_SIZE + mapsize;
> +
> +	if (metadata_write(res) < 0) {
> +		ec = EX_IOERR;
> +		goto end;
> +	}
> +	buf = calloc(1, mapsize);
> +	if (buf == NULL) {
> +		pjdlog_error("Unable to allocate %zu bytes of memory for initial bitmap.",
> +		    mapsize);
> +		ec = EX_TEMPFAIL;
> +		goto end;
> +	}
> +	if (pwrite(res->hr_localfd, buf, mapsize, METADATA_SIZE) !=
> +	    (ssize_t)mapsize) {
> +		pjdlog_errno(LOG_ERR, "Unable to store initial bitmap on %s",
> +		    res->hr_localpath);
> +		free(buf);
> +		ec = EX_IOERR;
> +		goto end;
> +	}
> +	free(buf);
> +end:
> +	if (res->hr_localfd >= 0)
> +		close(res->hr_localfd);
> +	pjdlog_prefix_set("%s", "");
> +	return (ec);
> +}
> +
> +static void
> +control_create(int argc, char *argv[], intmax_t mediasize, intmax_t extentsize,
> +    intmax_t keepdirty)
> +{
> +	struct hast_resource *res;
> +	int ec, ii, ret;
> +
> +	/* Initialize the given resources. */
> +	if (argc < 1)
> +		usage();
> +	ec = 0;
> +	for (ii = 0; ii < argc; ii++) {
> +		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> +			if (strcmp(argv[ii], res->hr_name) == 0)
> +				break;
> +		}
> +		if (res == NULL) {
> +			pjdlog_error("Unknown resource %s.", argv[ii]);
> +			if (ec == 0)
> +				ec = EX_DATAERR;
> +			continue;
> +		}
> +		ret = create_one(res, mediasize, extentsize, keepdirty);
> +		if (ret != 0 && ec == 0)
> +			ec = ret;
> +	}
> +	exit(ec);
> +}
> +
> +static int
> +dump_one(struct hast_resource *res)
> +{
> +	int ret;
> +
> +	ret = metadata_read(res, false);
> +	if (ret != 0)
> +		return (ret);
> +
> +	printf("resource: %s\n", res->hr_name);
> +	printf("    datasize: %ju\n", (uintmax_t)res->hr_datasize);
> +	printf("    extentsize: %d\n", res->hr_extentsize);
> +	printf("    keepdirty: %d\n", res->hr_keepdirty);
> +	printf("    localoff: %ju\n", (uintmax_t)res->hr_localoff);
> +	printf("    resuid: %ju\n", (uintmax_t)res->hr_resuid);
> +	printf("    localcnt: %ju\n", (uintmax_t)res->hr_primary_localcnt);
> +	printf("    remotecnt: %ju\n", (uintmax_t)res->hr_primary_remotecnt);
> +	printf("    prevrole: %s\n", role2str(res->hr_previous_role));
> +
> +	return (0);
> +}
> +
> +static void
> +control_dump(int argc, char *argv[])
> +{
> +	struct hast_resource *res;
> +	int ec, ret;
> +
> +	/* Dump metadata of the given resource(s). */
> +
> +	ec = 0;
> +	if (argc == 0 || (argc == 1 && strcmp(argv[0], "all") == 0)) {
> +		TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> +			ret = dump_one(res);
> +			if (ret != 0 && ec == 0)
> +				ec = ret;
> +		}
> +	} else {
> +		int ii;
> +
> +		for (ii = 0; ii < argc; ii++) {
> +			TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> +				if (strcmp(argv[ii], res->hr_name) == 0)
> +					break;
> +			}
> +			if (res == NULL) {
> +				pjdlog_error("Unknown resource %s.", argv[ii]);
> +				if (ec == 0)
> +					ec = EX_DATAERR;
> +				continue;
> +			}
> +			ret = dump_one(res);
> +			if (ret != 0 && ec == 0)
> +				ec = ret;
> +		}
> +	}
> +	exit(ec);
> +}
> +
> +static int
> +control_set_role(struct nv *nv, const char *newrole)
> +{
> +	const char *res, *oldrole;
> +	unsigned int ii;
> +	int error, ret;
> +
> +	ret = 0;
> +
> +	for (ii = 0; ; ii++) {
> +		res = nv_get_string(nv, "resource%u", ii);
> +		if (res == NULL)
> +			break;
> +		pjdlog_prefix_set("[%s] ", res);
> +		error = nv_get_int16(nv, "error%u", ii);
> +		if (error != 0) {
> +			if (ret == 0)
> +				ret = error;
> +			pjdlog_warning("Received error %d from hastd.", error);
> +			continue;
> +		}
> +		oldrole = nv_get_string(nv, "role%u", ii);
> +		if (strcmp(oldrole, newrole) == 0)
> +			pjdlog_debug(2, "Role unchanged (%s).", oldrole);
> +		else {
> +			pjdlog_debug(1, "Role changed from %s to %s.", oldrole,
> +			    newrole);
> +		}
> +	}
> +	pjdlog_prefix_set("%s", "");
> +	return (ret);
> +}
> +
> +static int
> +control_status(struct nv *nv)
> +{
> +	unsigned int ii;
> +	const char *str;
> +	int error, ret;
> +
> +	ret = 0;
> +
> +	for (ii = 0; ; ii++) {
> +		str = nv_get_string(nv, "resource%u", ii);
> +		if (str == NULL)
> +			break;
> +		printf("%s:\n", str);
> +		error = nv_get_int16(nv, "error%u", ii);
> +		if (error != 0) {
> +			if (ret == 0)
> +				ret = error;
> +			printf("  error: %d\n", error);
> +			continue;
> +		}
> +		printf("  role: %s\n", nv_get_string(nv, "role%u", ii));
> +		printf("  provname: %s\n",
> +		    nv_get_string(nv, "provname%u", ii));
> +		printf("  localpath: %s\n",
> +		    nv_get_string(nv, "localpath%u", ii));
> +		printf("  extentsize: %u\n",
> +		    (unsigned int)nv_get_uint32(nv, "extentsize%u", ii));
> +		printf("  keepdirty: %u\n",
> +		    (unsigned int)nv_get_uint32(nv, "keepdirty%u", ii));
> +		printf("  remoteaddr: %s\n",
> +		    nv_get_string(nv, "remoteaddr%u", ii));
> +		printf("  replication: %s\n",
> +		    nv_get_string(nv, "replication%u", ii));
> +		str = nv_get_string(nv, "status%u", ii);
> +		if (str != NULL)
> +			printf("  status: %s\n", str);
> +		printf("  dirty: %ju bytes\n",
> +		    (uintmax_t)nv_get_uint64(nv, "dirty%u", ii));
> +	}
> +	return (ret);
> +}
> +
> +static int
> +numfromstr(const char *str, intmax_t *nump)
> +{
> +	intmax_t num;
> +	char *suffix;
> +	int rerrno;
> +
> +	rerrno = errno;
> +	errno = 0;
> +	num = strtoimax(str, &suffix, 0);
> +	if (errno == 0 && *suffix != '\0')
> +		errno = EINVAL;
> +	if (errno != 0)
> +		return (-1);
> +	*nump = num;
> +	errno = rerrno;
> +	return (0);
> +}
> +
> +int
> +main(int argc, char *argv[])
> +{
> +	struct nv *nv;
> +	intmax_t mediasize, extentsize, keepdirty;
> +	int cmd, debug, error, ii;
> +	const char *optstr;
> +
> +	debug = 0;
> +	mediasize = extentsize = keepdirty = 0;
> +
> +	if (argc == 1)
> +		usage();
> +
> +	if (strcmp(argv[1], "create") == 0) {
> +		cmd = CMD_CREATE;
> +		optstr = "c:de:k:m:h";
> +	} else if (strcmp(argv[1], "role") == 0) {
> +		cmd = CMD_ROLE;
> +		optstr = "c:dh";
> +	} else if (strcmp(argv[1], "status") == 0) {
> +		cmd = CMD_STATUS;
> +		optstr = "c:dh";
> +	} else if (strcmp(argv[1], "dump") == 0) {
> +		cmd = CMD_DUMP;
> +		optstr = "c:dh";
> +	} else
> +		usage();
> +
> +	argc--;
> +	argv++;
> +
> +	for (;;) {
> +		int ch;
> +
> +		ch = getopt(argc, argv, optstr);
> +		if (ch == -1)
> +			break;
> +		switch (ch) {
> +		case 'c':
> +			cfgpath = optarg;
> +			break;
> +		case 'd':
> +			debug++;
> +			break;
> +		case 'e':
> +			if (numfromstr(optarg, &extentsize) < 0)
> +				err(1, "Invalid extentsize");
> +			break;
> +		case 'k':
> +			if (numfromstr(optarg, &keepdirty) < 0)
> +				err(1, "Invalid keepdirty");
> +			break;
> +		case 'm':
> +			if (numfromstr(optarg, &mediasize) < 0)
> +				err(1, "Invalid mediasize");
> +			break;
> +		case 'h':
> +		default:
> +			usage();
> +		}
> +	}
> +	argc -= optind;
> +	argv += optind;
> +
> +	switch (cmd) {
> +	case CMD_CREATE:
> +	case CMD_ROLE:
> +		if (argc == 0)
> +			usage();
> +		break;
> +	}
> +
> +	pjdlog_debug_set(debug);
> +
> +	cfg = yy_config_parse(cfgpath);
> +	assert(cfg != NULL);
> +
> +	switch (cmd) {
> +	case CMD_CREATE:
> +		control_create(argc, argv, mediasize, extentsize, keepdirty);
> +		/* NOTREACHED */
> +		assert(!"What are we doing here?!");
> +		break;
> +	case CMD_DUMP:
> +		/* Dump metadata from local component of the given resource. */
> +		control_dump(argc, argv);
> +		/* NOTREACHED */
> +		assert(!"What are we doing here?!");
> +		break;
> +	case CMD_ROLE:
> +		/* Change role for the given resources. */
> +		if (argc < 2)
> +			usage();
> +		nv = nv_alloc();
> +		nv_add_uint8(nv, HASTCTL_CMD_SETROLE, "cmd");
> +		if (strcmp(argv[0], "init") == 0)
> +			nv_add_uint8(nv, HAST_ROLE_INIT, "role");
> +		else if (strcmp(argv[0], "primary") == 0)
> +			nv_add_uint8(nv, HAST_ROLE_PRIMARY, "role");
> +		else if (strcmp(argv[0], "secondary") == 0)
> +			nv_add_uint8(nv, HAST_ROLE_SECONDARY, "role");
> +		else
> +			usage();
> +		for (ii = 0; ii < argc - 1; ii++)
> +			nv_add_string(nv, argv[ii + 1], "resource%d", ii);
> +		break;
> +	case CMD_STATUS:
> +		/* Obtain status of the given resources. */
> +		nv = nv_alloc();
> +		nv_add_uint8(nv, HASTCTL_CMD_STATUS, "cmd");
> +		if (argc == 0)
> +			nv_add_string(nv, "all", "resource%d", 0);
> +		else {
> +			for (ii = 0; ii < argc; ii++)
> +				nv_add_string(nv, argv[ii], "resource%d", ii);
> +		}
> +		break;
> +	default:
> +		assert(!"Impossible role!");
> +	}
> +
> +	/* Setup control connection... */
> +	if (proto_client(cfg->hc_controladdr, &controlconn) < 0) {
> +		pjdlog_exit(EX_OSERR,
> +		    "Unable to setup control connection to %s",
> +		    cfg->hc_controladdr);
> +	}
> +	/* ...and connect to hastd. */
> +	if (proto_connect(controlconn) < 0) {
> +		pjdlog_exit(EX_OSERR, "Unable to connect to hastd via %s",
> +		    cfg->hc_controladdr);
> +	}
> +	/* Send the command to the server... */
> +	if (hast_proto_send(NULL, controlconn, nv, NULL, 0) < 0) {
> +		pjdlog_exit(EX_UNAVAILABLE,
> +		    "Unable to send command to hastd via %s",
> +		    cfg->hc_controladdr);
> +	}
> +	nv_free(nv);
> +	/* ...and receive reply. */
> +	if (hast_proto_recv(NULL, controlconn, &nv, NULL, 0) < 0) {
> +		pjdlog_exit(EX_UNAVAILABLE,
> +		    "cannot receive reply from hastd via %s",
> +		    cfg->hc_controladdr);
> +	}
> +
> +	error = nv_get_int16(nv, "error");
> +	if (error != 0) {
> +		pjdlog_exitx(EX_SOFTWARE, "Error %d received from hastd.",
> +		    error);
> +	}
> +	nv_set_error(nv, 0);
> +
> +	switch (cmd) {
> +	case CMD_ROLE:
> +		error = control_set_role(nv, argv[0]);
> +		break;
> +	case CMD_STATUS:
> +		error = control_status(nv);
> +		break;
> +	default:
> +		assert(!"Impossible role!");
> +	}
> +
> +	exit(error);
> +}
>
> Added: head/sbin/hastd/Makefile
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sbin/hastd/Makefile	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,37 @@
> +# $FreeBSD$
> +
> +.include <bsd.own.mk>
> +
> +PROG=	hastd
> +SRCS=	activemap.c
> +SRCS+=	control.c
> +SRCS+=	ebuf.c
> +SRCS+=	hast_proto.c hastd.c hooks.c
> +SRCS+=	metadata.c
> +SRCS+=	nv.c
> +SRCS+=	secondary.c
> +SRCS+=	parse.y pjdlog.c primary.c
> +SRCS+=	proto.c proto_common.c proto_socketpair.c proto_tcp4.c proto_uds.c
> +SRCS+=	rangelock.c
> +SRCS+=	subr.c
> +SRCS+=	token.l
> +SRCS+=	y.tab.h
> +WARNS?=	6
> +MAN=	hastd.8 hast.conf.5
> +
> +CFLAGS+=-I${.CURDIR}
> +CFLAGS+=-DINET
> +.if ${MK_INET6_SUPPORT} != "no"
> +CFLAGS+=-DINET6
> +.endif
> +# This is needed to have WARNS > 1.
> +CFLAGS+=-DYY_NO_UNPUT
> +
> +DPADD=	${LIBCRYPTO} ${LIBGEOM} ${LIBL} ${LIBPTHREAD} ${LIBUTIL}
> +LDADD=	-lcrypto -lgeom -ll -lpthread -lutil
> +
> +YFLAGS+=-v
> +
> +CLEANFILES=y.tab.c y.tab.h y.output
> +
> +.include <bsd.prog.mk>
>
> Added: head/sbin/hastd/activemap.c
> ==============================================================================
> --- /dev/null	00:00:00 1970	(empty, because file is newly added)
> +++ head/sbin/hastd/activemap.c	Thu Feb 18 23:16:19 2010	(r204076)
> @@ -0,0 +1,691 @@
> +/*-
> + * Copyright (c) 2009-2010 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Pawel Jakub Dawidek under sponsorship from
> + * the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>	/* powerof2() */
> +#include <sys/queue.h>
> +
> +#include <assert.h>
> +#include <bitstring.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include <activemap.h>
> +
> +#define	ACTIVEMAP_MAGIC	0xac71e4
> +struct activemap {
> +	int		 am_magic;	/* Magic value. */
> +	off_t	 	 am_mediasize;	/* Media size in bytes. */
> +	uint32_t	 am_extentsize;	/* Extent size in bytes,
> +					   must be power of 2. */
> +	uint8_t		 am_extentshift;/* 2 ^ extentbits == extentsize */
> +	int		 am_nextents;	/* Number of extents. */
> +	size_t		 am_mapsize;	/* Bitmap size in bytes. */
> +	uint16_t	*am_memtab;	/* An array that holds number of pending
> +					   writes per extent. */
> +	bitstr_t	*am_diskmap;	/* On-disk bitmap of dirty extents. */
> +	bitstr_t	*am_memmap;	/* In-memory bitmap of dirty extents. */
> +	size_t		 am_diskmapsize; /* Map size rounded up to sector size. */
> +	uint64_t	 am_ndirty;	/* Number of dirty regions. */
> +	bitstr_t	*am_syncmap;	/* Bitmap of extents to sync. */
> +	off_t		 am_syncoff;	/* Next synchronization offset. */
> +	TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
> +					   we keep dirty to reduce bitmap
> +					   updates. */
> +	int		 am_nkeepdirty;	/* Number of am_keepdirty elements. */
> +	int		 am_nkeepdirty_limit; /* Maximum number of am_keepdirty
> +					         elements. */
> +};
> +
> +struct keepdirty {
> +	int	kd_extent;
> +	TAILQ_ENTRY(keepdirty) kd_next;
> +};
> +
> +/*
> + * Helper function taken from sys/systm.h to calculate extentshift.
> + */
> +static uint32_t
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>


More information about the svn-src-head mailing list