svn commit: r204076 - in head: etc/defaults etc/rc.d sbin
sbin/ggate/ggatec sbin/ggate/ggatel sbin/hastctl sbin/hastd share/examples
share/examples/hast share/man/man5 sys/geom/gate
Robert Watson
rwatson at FreeBSD.org
Thu Feb 18 23:22:32 UTC 2010
On Thu, 18 Feb 2010, Pawel Jakub Dawidek wrote:
> Please welcome HAST - Highly Avalable Storage.
Excellent news! I know a number of shops will be very excited to see this in
the tree.
Could you say a little about the future of ggated in light of the new arrival?
Robert
>
> HAST allows to transparently store data on two physically separated machines
> connected over the TCP/IP network. HAST works in Primary-Secondary
> (Master-Backup, Master-Slave) configuration, which means that only one of the
> cluster nodes can be active at any given time. Only Primary node is able to
> handle I/O requests to HAST-managed devices. Currently HAST is limited to two
> cluster nodes in total.
>
> HAST operates on block level - it provides disk-like devices in /dev/hast/
> directory for use by file systems and/or applications. Working on block level
> makes it transparent for file systems and applications. There in no difference
> between using HAST-provided device and raw disk, partition, etc. All of them
> are just regular GEOM providers in FreeBSD.
>
> For more information please consult hastd(8), hastctl(8) and hast.conf(5)
> manual pages, as well as http://wiki.FreeBSD.org/HAST.
>
> Sponsored by: FreeBSD Foundation
> Sponsored by: OMCnet Internet Service GmbH
> Sponsored by: TransIP BV
>
> Added:
> head/etc/rc.d/hastd (contents, props changed)
> head/sbin/hastctl/
> head/sbin/hastctl/Makefile (contents, props changed)
> head/sbin/hastctl/hastctl.8 (contents, props changed)
> head/sbin/hastctl/hastctl.c (contents, props changed)
> head/sbin/hastd/
> head/sbin/hastd/Makefile (contents, props changed)
> head/sbin/hastd/activemap.c (contents, props changed)
> head/sbin/hastd/activemap.h (contents, props changed)
> head/sbin/hastd/control.c (contents, props changed)
> head/sbin/hastd/control.h (contents, props changed)
> head/sbin/hastd/ebuf.c (contents, props changed)
> head/sbin/hastd/ebuf.h (contents, props changed)
> head/sbin/hastd/hast.conf.5 (contents, props changed)
> head/sbin/hastd/hast.h (contents, props changed)
> head/sbin/hastd/hast_proto.c (contents, props changed)
> head/sbin/hastd/hast_proto.h (contents, props changed)
> head/sbin/hastd/hastd.8 (contents, props changed)
> head/sbin/hastd/hastd.c (contents, props changed)
> head/sbin/hastd/hastd.h (contents, props changed)
> head/sbin/hastd/hooks.c (contents, props changed)
> head/sbin/hastd/hooks.h (contents, props changed)
> head/sbin/hastd/metadata.c (contents, props changed)
> head/sbin/hastd/metadata.h (contents, props changed)
> head/sbin/hastd/nv.c (contents, props changed)
> head/sbin/hastd/nv.h (contents, props changed)
> head/sbin/hastd/parse.y (contents, props changed)
> head/sbin/hastd/pjdlog.c (contents, props changed)
> head/sbin/hastd/pjdlog.h (contents, props changed)
> head/sbin/hastd/primary.c (contents, props changed)
> head/sbin/hastd/proto.c (contents, props changed)
> head/sbin/hastd/proto.h (contents, props changed)
> head/sbin/hastd/proto_common.c (contents, props changed)
> head/sbin/hastd/proto_impl.h (contents, props changed)
> head/sbin/hastd/proto_socketpair.c (contents, props changed)
> head/sbin/hastd/proto_tcp4.c (contents, props changed)
> head/sbin/hastd/proto_uds.c (contents, props changed)
> head/sbin/hastd/rangelock.c (contents, props changed)
> head/sbin/hastd/rangelock.h (contents, props changed)
> head/sbin/hastd/secondary.c (contents, props changed)
> head/sbin/hastd/subr.c (contents, props changed)
> head/sbin/hastd/subr.h (contents, props changed)
> head/sbin/hastd/synch.h (contents, props changed)
> head/sbin/hastd/token.l (contents, props changed)
> head/share/examples/hast/
> head/share/examples/hast/ucarp.sh (contents, props changed)
> head/share/examples/hast/ucarp_down.sh (contents, props changed)
> head/share/examples/hast/ucarp_up.sh (contents, props changed)
> head/share/examples/hast/vip-down.sh (contents, props changed)
> head/share/examples/hast/vip-up.sh (contents, props changed)
> Modified:
> head/etc/defaults/rc.conf
> head/etc/rc.d/Makefile
> head/sbin/Makefile
> head/sbin/ggate/ggatec/ggatec.c
> head/sbin/ggate/ggatel/ggatel.c
> head/share/examples/Makefile
> head/share/man/man5/rc.conf.5
> head/sys/geom/gate/g_gate.c
> head/sys/geom/gate/g_gate.h
>
> Modified: head/etc/defaults/rc.conf
> ==============================================================================
> --- head/etc/defaults/rc.conf Thu Feb 18 23:04:01 2010 (r204075)
> +++ head/etc/defaults/rc.conf Thu Feb 18 23:16:19 2010 (r204076)
> @@ -260,6 +260,9 @@ syslogd_flags="-s" # Flags to syslogd (
> inetd_enable="NO" # Run the network daemon dispatcher (YES/NO).
> inetd_program="/usr/sbin/inetd" # path to inetd, if you want a different one.
> inetd_flags="-wW -C 60" # Optional flags to inetd
> +hastd_enable="NO" # Run the HAST daemon (YES/NO).
> +hastd_program="/sbin/hastd" # path to hastd, if you want a different one.
> +hastd_flags="" # Optional flags to hastd.
> #
> # named. It may be possible to run named in a sandbox, man security for
> # details.
>
> Modified: head/etc/rc.d/Makefile
> ==============================================================================
> --- head/etc/rc.d/Makefile Thu Feb 18 23:04:01 2010 (r204075)
> +++ head/etc/rc.d/Makefile Thu Feb 18 23:16:19 2010 (r204076)
> @@ -12,7 +12,7 @@ FILES= DAEMON FILESYSTEMS LOGIN NETWORKI
> encswap \
> faith fsck ftp-proxy ftpd \
> gbde geli geli2 gssd \
> - hcsecd \
> + hastd hcsecd \
> hostapd hostid hostid_save hostname \
> inetd initrandom \
> ip6addrctl ipfilter ipfs ipfw ipmon \
>
> Added: head/etc/rc.d/hastd
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/etc/rc.d/hastd Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,31 @@
> +#!/bin/sh
> +#
> +# $FreeBSD$
> +#
> +
> +# PROVIDE: hastd
> +# REQUIRE: NETWORKING syslogd
> +# BEFORE: DAEMON
> +
> +. /etc/rc.subr
> +
> +name="hastd"
> +rcvar=`set_rcvar`
> +pidfile="/var/run/${name}.pid"
> +command="/sbin/${name}"
> +hastctl="/sbin/hastctl"
> +required_files="/etc/hast.conf"
> +stop_precmd="hastd_stop_precmd"
> +required_modules="geom_gate:g_gate"
> +
> +sockfile="/var/run/syslogd.sockets"
> +evalargs="rc_flags=\"\`set_socketlist\` \$rc_flags\""
> +altlog_proglist="named"
> +
> +hastd_stop_precmd()
> +{
> + ${hastctl} role init all
> +}
> +
> +load_rc_config $name
> +run_rc_command "$1"
>
> Modified: head/sbin/Makefile
> ==============================================================================
> --- head/sbin/Makefile Thu Feb 18 23:04:01 2010 (r204075)
> +++ head/sbin/Makefile Thu Feb 18 23:16:19 2010 (r204076)
> @@ -36,6 +36,8 @@ SUBDIR= adjkerntz \
> ggate \
> growfs \
> gvinum \
> + hastctl \
> + hastd \
> ifconfig \
> init \
> ${_ipf} \
>
> Modified: head/sbin/ggate/ggatec/ggatec.c
> ==============================================================================
> --- head/sbin/ggate/ggatec/ggatec.c Thu Feb 18 23:04:01 2010 (r204075)
> +++ head/sbin/ggate/ggatec/ggatec.c Thu Feb 18 23:16:19 2010 (r204076)
> @@ -59,7 +59,7 @@ enum { UNSET, CREATE, DESTROY, LIST, RES
>
> static const char *path = NULL;
> static const char *host = NULL;
> -static int unit = -1;
> +static int unit = G_GATE_UNIT_AUTO;
> static unsigned flags = 0;
> static int force = 0;
> static unsigned queue_size = G_GATE_QUEUE_SIZE;
>
> Modified: head/sbin/ggate/ggatel/ggatel.c
> ==============================================================================
> --- head/sbin/ggate/ggatel/ggatel.c Thu Feb 18 23:04:01 2010 (r204075)
> +++ head/sbin/ggate/ggatel/ggatel.c Thu Feb 18 23:16:19 2010 (r204076)
> @@ -50,7 +50,7 @@
> enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;
>
> static const char *path = NULL;
> -static int unit = -1;
> +static int unit = G_GATE_UNIT_AUTO;
> static unsigned flags = 0;
> static int force = 0;
> static unsigned queue_size = G_GATE_QUEUE_SIZE;
>
> Added: head/sbin/hastctl/Makefile
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sbin/hastctl/Makefile Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,36 @@
> +# $FreeBSD$
> +
> +.include <bsd.own.mk>
> +
> +.PATH: ${.CURDIR}/../hastd
> +
> +PROG= hastctl
> +SRCS= activemap.c
> +SRCS+= ebuf.c
> +SRCS+= hast_proto.c hastctl.c
> +SRCS+= metadata.c
> +SRCS+= nv.c
> +SRCS+= parse.y pjdlog.c
> +SRCS+= proto.c proto_common.c proto_tcp4.c proto_uds.c
> +SRCS+= token.l
> +SRCS+= subr.c
> +SRCS+= y.tab.h
> +WARNS?= 6
> +MAN= hastctl.8
> +
> +CFLAGS+=-I${.CURDIR}/../hastd
> +CFLAGS+=-DINET
> +.if ${MK_INET6_SUPPORT} != "no"
> +CFLAGS+=-DINET6
> +.endif
> +# This is needed to have WARNS > 1.
> +CFLAGS+=-DYY_NO_UNPUT
> +
> +DPADD= ${LIBCRYPTO} ${LIBL}
> +LDADD= -lcrypto -ll
> +
> +YFLAGS+=-v
> +
> +CLEANFILES=y.tab.c y.tab.h y.output
> +
> +.include <bsd.prog.mk>
>
> Added: head/sbin/hastctl/hastctl.8
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sbin/hastctl/hastctl.8 Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,217 @@
> +.\" Copyright (c) 2010 The FreeBSD Foundation
> +.\" All rights reserved.
> +.\"
> +.\" This software was developed by Pawel Jakub Dawidek under sponsorship from
> +.\" the FreeBSD Foundation.
> +.\"
> +.\" Redistribution and use in source and binary forms, with or without
> +.\" modification, are permitted provided that the following conditions
> +.\" are met:
> +.\" 1. Redistributions of source code must retain the above copyright
> +.\" notice, this list of conditions and the following disclaimer.
> +.\" 2. Redistributions in binary form must reproduce the above copyright
> +.\" notice, this list of conditions and the following disclaimer in the
> +.\" documentation and/or other materials provided with the distribution.
> +.\"
> +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> +.\" SUCH DAMAGE.
> +.\"
> +.\" $FreeBSD$
> +.\"
> +.Dd February 1, 2010
> +.Dt HASTCTL 8
> +.Os
> +.Sh NAME
> +.Nm hastctl
> +.Nd "Highly Available Storage control utility"
> +.Sh SYNOPSIS
> +.Nm
> +.Cm create
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Fl e Ar extentsize
> +.Op Fl k Ar keepdirty
> +.Op Fl m Ar mediasize
> +.Ar name ...
> +.Nm
> +.Cm role
> +.Op Fl d
> +.Op Fl c Ar config
> +.Aq init | primary | secondary
> +.Ar all | name ...
> +.Nm
> +.Cm status
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Ar all | name ...
> +.Nm
> +.Cm dump
> +.Op Fl d
> +.Op Fl c Ar config
> +.Op Ar all | name ...
> +.Sh DESCRIPTION
> +The
> +.Nm
> +utility is used to control the behaviour of the
> +.Xr hastd 8
> +daemon.
> +.Pp
> +This utility should be used by HA software like
> +.Nm heartbeat
> +or
> +.Nm ucarp
> +to setup HAST resources role when changing from primary mode to
> +secondary or vice versa.
> +Be aware that if a file system like UFS exists on HAST provider and
> +primary node dies, file system has to be checked for inconsistencies
> +with the
> +.Xr fsck 8
> +utility after switching secondary node to primary role.
> +.Pp
> +The first argument to
> +.Nm
> +indicates an action to be performed:
> +.Bl -tag -width ".Cm create"
> +.It Cm create
> +Initialize local provider configured for the given resource.
> +Additional options include:
> +.Bl -tag -width ".Fl e Ar extentsize"
> +.It Fl e Ar extentsize
> +Size of an extent.
> +Extent is a block which is used for synchronization.
> +.Nm
> +maintains a map of dirty extents and extent is the smallest region that
> +can be marked as dirty.
> +If any part of an extent is modified, entire extent will be synchronized
> +when nodes connect.
> +If extent size is too small, there will be too much disk activity
> +related to dirty map updates, which will degrade performance of the
> +given resource.
> +If extent size is too large, synchronization, even in case of short
> +outage, can take a long time increasing the risk of loosing up-to-date
> +node before synchronization process is completed.
> +The default extent size is
> +.Va 2MB .
> +.It Fl k Ar keepdirty
> +Maximum number of dirty extents to keep dirty all the time.
> +Most recently used extents are kept dirty to reduce number of metadata
> +updates.
> +The default numer of most recently used extents which will be kept
> +dirty is
> +.Va 64 .
> +.It Fl m Ar mediasize
> +Size of the smaller provider used as backend storage on both nodes.
> +This option can be omitted if node providers have the same size on both
> +sides.
> +.El
> +.It Cm role
> +Change role of the given resource.
> +The role can be one of:
> +.Bl -tag -width ".Cm secondary"
> +.It Cm init
> +Resource is turned off.
> +.It Cm primary
> +Local
> +.Xr hastd 8
> +daemon will act as primary node for the given resource.
> +System on which resource role is set to primary can use
> +.Pa /dev/hast/<name>
> +GEOM provider.
> +.It Cm secondary
> +Local
> +.Xr hastd 8
> +daemon will act as secondary node for the given resource - it will wait
> +for connection from the primary node and will handle I/O requests
> +received from it.
> +GEOM provider
> +.Pa /dev/hast/<name>
> +will not be created on secondary node.
> +.El
> +.It Cm status
> +Present status of the configured resources.
> +.It Cm dump
> +Dump metadata stored on local component for the configured resources.
> +.El
> +.Pp
> +In addition, every subcommand can be followed by the following options:
> +.Bl -tag -width ".Fl c Ar config"
> +.It Fl c Ar config
> +Specify alternative location of the configuration file.
> +The default location is
> +.Pa /etc/hast.conf .
> +.It Fl d
> +Print debugging information.
> +This option can be specified multiple times to raise the verbosity
> +level.
> +.El
> +.Sh EXIT STATUS
> +Exit status is 0 on success, or one of the values described in
> +.Xr sysexits 3
> +on failure.
> +.Sh EXAMPLES
> +Initialize HAST provider, create file system on it and mount it.
> +.Bd -literal -offset indent
> +nodeB# hastctl create shared
> +nodeB# hastd
> +nodeB# hastctl role secondary shared
> +
> +nodeB# hastctl create shared
> +nodeA# hastd
> +nodeA# hastctl role primary shared
> +nodeA# newfs -U /dev/hast/shared
> +nodeA# mount -o noatime /dev/hast/shared /shared
> +nodeA# application_start
> +.Ed
> +.Pp
> +Switch roles for the
> +.Nm shared
> +HAST resource.
> +.Bd -literal -offset indent
> +nodeA# application_stop
> +nodeA# umount -f /shared
> +nodeA# hastctl role secondary shared
> +
> +nodeB# hastctl role primary shared
> +nodeB# fsck -t ufs /dev/hast/shared
> +nodeB# mount -o noatime /dev/hast/shared /shared
> +nodeB# application_start
> +.Ed
> +.Sh FILES
> +.Bl -tag -width ".Pa /var/run/hastctl" -compact
> +.It Pa /etc/hast.conf
> +Configuration file for
> +.Nm
> +and
> +.Xr hastd 8 .
> +.It Pa /var/run/hastctl
> +Control socket used by
> +.Nm
> +to communicate with the
> +.Xr hastd 8
> +daemon.
> +.El
> +.Sh SEE ALSO
> +.Xr sysexits 3 ,
> +.Xr geom 4 ,
> +.Xr hast.conf 5 ,
> +.Xr fsck 8 ,
> +.Xr ggatec 8 ,
> +.Xr ggatel 8 ,
> +.Xr hastd 8 ,
> +.Xr mount 8 ,
> +.Xr newfs 8 .
> +.Sh AUTHORS
> +The
> +.Nm
> +was developed by
> +.An Pawel Jakub Dawidek Aq pjd at FreeBSD.org
> +under sponsorship of the FreeBSD Foundation.
>
> Added: head/sbin/hastctl/hastctl.c
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sbin/hastctl/hastctl.c Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,526 @@
> +/*-
> + * Copyright (c) 2009-2010 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Pawel Jakub Dawidek under sponsorship from
> + * the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h>
> +#include <sys/disk.h>
> +#include <sys/ioctl.h>
> +#include <sys/stat.h>
> +#include <sys/sysctl.h>
> +
> +#include <assert.h>
> +#include <err.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <limits.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sysexits.h>
> +#include <unistd.h>
> +
> +#include <activemap.h>
> +
> +#include "hast.h"
> +#include "hast_proto.h"
> +#include "metadata.h"
> +#include "nv.h"
> +#include "pjdlog.h"
> +#include "proto.h"
> +#include "subr.h"
> +
> +/* Path to configuration file. */
> +static const char *cfgpath = HAST_CONFIG;
> +/* Hastd configuration. */
> +static struct hastd_config *cfg;
> +/* Control connection. */
> +static struct proto_conn *controlconn;
> +
> +enum {
> + CMD_INVALID,
> + CMD_CREATE,
> + CMD_ROLE,
> + CMD_STATUS,
> + CMD_DUMP
> +};
> +
> +static __dead2 void
> +usage(void)
> +{
> +
> + fprintf(stderr,
> + "usage: %s create [-d] [-c config] [-e extentsize] [-k keepdirty]\n"
> + "\t\t[-m mediasize] name ...\n",
> + getprogname());
> + fprintf(stderr,
> + " %s role [-d] [-c config] <init | primary | secondary> all | name ...\n",
> + getprogname());
> + fprintf(stderr,
> + " %s status [-d] [-c config] [all | name ...]\n",
> + getprogname());
> + fprintf(stderr,
> + " %s dump [-d] [-c config] [all | name ...]\n",
> + getprogname());
> + exit(EX_USAGE);
> +}
> +
> +static int
> +create_one(struct hast_resource *res, intmax_t mediasize, intmax_t extentsize,
> + intmax_t keepdirty)
> +{
> + unsigned char *buf;
> + size_t mapsize;
> + int ec;
> +
> + ec = 0;
> + pjdlog_prefix_set("[%s] ", res->hr_name);
> +
> + if (provinfo(res, true) < 0) {
> + ec = EX_NOINPUT;
> + goto end;
> + }
> + if (mediasize == 0)
> + mediasize = res->hr_local_mediasize;
> + else if (mediasize > res->hr_local_mediasize) {
> + pjdlog_error("Provided mediasize is larger than provider %s size.",
> + res->hr_localpath);
> + ec = EX_DATAERR;
> + goto end;
> + }
> + if (!powerof2(res->hr_local_sectorsize)) {
> + pjdlog_error("Sector size of provider %s is not power of 2 (%u).",
> + res->hr_localpath, res->hr_local_sectorsize);
> + ec = EX_DATAERR;
> + goto end;
> + }
> + if (extentsize == 0)
> + extentsize = HAST_EXTENTSIZE;
> + if (extentsize < res->hr_local_sectorsize) {
> + pjdlog_error("Extent size (%jd) is less than sector size (%u).",
> + (intmax_t)extentsize, res->hr_local_sectorsize);
> + ec = EX_DATAERR;
> + goto end;
> + }
> + if ((extentsize % res->hr_local_sectorsize) != 0) {
> + pjdlog_error("Extent size (%jd) is not multiple of sector size (%u).",
> + (intmax_t)extentsize, res->hr_local_sectorsize);
> + ec = EX_DATAERR;
> + goto end;
> + }
> + mapsize = activemap_calc_ondisk_size(mediasize - METADATA_SIZE,
> + extentsize, res->hr_local_sectorsize);
> + if (keepdirty == 0)
> + keepdirty = HAST_KEEPDIRTY;
> + res->hr_datasize = mediasize - METADATA_SIZE - mapsize;
> + res->hr_extentsize = extentsize;
> + res->hr_keepdirty = keepdirty;
> +
> + res->hr_localoff = METADATA_SIZE + mapsize;
> +
> + if (metadata_write(res) < 0) {
> + ec = EX_IOERR;
> + goto end;
> + }
> + buf = calloc(1, mapsize);
> + if (buf == NULL) {
> + pjdlog_error("Unable to allocate %zu bytes of memory for initial bitmap.",
> + mapsize);
> + ec = EX_TEMPFAIL;
> + goto end;
> + }
> + if (pwrite(res->hr_localfd, buf, mapsize, METADATA_SIZE) !=
> + (ssize_t)mapsize) {
> + pjdlog_errno(LOG_ERR, "Unable to store initial bitmap on %s",
> + res->hr_localpath);
> + free(buf);
> + ec = EX_IOERR;
> + goto end;
> + }
> + free(buf);
> +end:
> + if (res->hr_localfd >= 0)
> + close(res->hr_localfd);
> + pjdlog_prefix_set("%s", "");
> + return (ec);
> +}
> +
> +static void
> +control_create(int argc, char *argv[], intmax_t mediasize, intmax_t extentsize,
> + intmax_t keepdirty)
> +{
> + struct hast_resource *res;
> + int ec, ii, ret;
> +
> + /* Initialize the given resources. */
> + if (argc < 1)
> + usage();
> + ec = 0;
> + for (ii = 0; ii < argc; ii++) {
> + TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> + if (strcmp(argv[ii], res->hr_name) == 0)
> + break;
> + }
> + if (res == NULL) {
> + pjdlog_error("Unknown resource %s.", argv[ii]);
> + if (ec == 0)
> + ec = EX_DATAERR;
> + continue;
> + }
> + ret = create_one(res, mediasize, extentsize, keepdirty);
> + if (ret != 0 && ec == 0)
> + ec = ret;
> + }
> + exit(ec);
> +}
> +
> +static int
> +dump_one(struct hast_resource *res)
> +{
> + int ret;
> +
> + ret = metadata_read(res, false);
> + if (ret != 0)
> + return (ret);
> +
> + printf("resource: %s\n", res->hr_name);
> + printf(" datasize: %ju\n", (uintmax_t)res->hr_datasize);
> + printf(" extentsize: %d\n", res->hr_extentsize);
> + printf(" keepdirty: %d\n", res->hr_keepdirty);
> + printf(" localoff: %ju\n", (uintmax_t)res->hr_localoff);
> + printf(" resuid: %ju\n", (uintmax_t)res->hr_resuid);
> + printf(" localcnt: %ju\n", (uintmax_t)res->hr_primary_localcnt);
> + printf(" remotecnt: %ju\n", (uintmax_t)res->hr_primary_remotecnt);
> + printf(" prevrole: %s\n", role2str(res->hr_previous_role));
> +
> + return (0);
> +}
> +
> +static void
> +control_dump(int argc, char *argv[])
> +{
> + struct hast_resource *res;
> + int ec, ret;
> +
> + /* Dump metadata of the given resource(s). */
> +
> + ec = 0;
> + if (argc == 0 || (argc == 1 && strcmp(argv[0], "all") == 0)) {
> + TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> + ret = dump_one(res);
> + if (ret != 0 && ec == 0)
> + ec = ret;
> + }
> + } else {
> + int ii;
> +
> + for (ii = 0; ii < argc; ii++) {
> + TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
> + if (strcmp(argv[ii], res->hr_name) == 0)
> + break;
> + }
> + if (res == NULL) {
> + pjdlog_error("Unknown resource %s.", argv[ii]);
> + if (ec == 0)
> + ec = EX_DATAERR;
> + continue;
> + }
> + ret = dump_one(res);
> + if (ret != 0 && ec == 0)
> + ec = ret;
> + }
> + }
> + exit(ec);
> +}
> +
> +static int
> +control_set_role(struct nv *nv, const char *newrole)
> +{
> + const char *res, *oldrole;
> + unsigned int ii;
> + int error, ret;
> +
> + ret = 0;
> +
> + for (ii = 0; ; ii++) {
> + res = nv_get_string(nv, "resource%u", ii);
> + if (res == NULL)
> + break;
> + pjdlog_prefix_set("[%s] ", res);
> + error = nv_get_int16(nv, "error%u", ii);
> + if (error != 0) {
> + if (ret == 0)
> + ret = error;
> + pjdlog_warning("Received error %d from hastd.", error);
> + continue;
> + }
> + oldrole = nv_get_string(nv, "role%u", ii);
> + if (strcmp(oldrole, newrole) == 0)
> + pjdlog_debug(2, "Role unchanged (%s).", oldrole);
> + else {
> + pjdlog_debug(1, "Role changed from %s to %s.", oldrole,
> + newrole);
> + }
> + }
> + pjdlog_prefix_set("%s", "");
> + return (ret);
> +}
> +
> +static int
> +control_status(struct nv *nv)
> +{
> + unsigned int ii;
> + const char *str;
> + int error, ret;
> +
> + ret = 0;
> +
> + for (ii = 0; ; ii++) {
> + str = nv_get_string(nv, "resource%u", ii);
> + if (str == NULL)
> + break;
> + printf("%s:\n", str);
> + error = nv_get_int16(nv, "error%u", ii);
> + if (error != 0) {
> + if (ret == 0)
> + ret = error;
> + printf(" error: %d\n", error);
> + continue;
> + }
> + printf(" role: %s\n", nv_get_string(nv, "role%u", ii));
> + printf(" provname: %s\n",
> + nv_get_string(nv, "provname%u", ii));
> + printf(" localpath: %s\n",
> + nv_get_string(nv, "localpath%u", ii));
> + printf(" extentsize: %u\n",
> + (unsigned int)nv_get_uint32(nv, "extentsize%u", ii));
> + printf(" keepdirty: %u\n",
> + (unsigned int)nv_get_uint32(nv, "keepdirty%u", ii));
> + printf(" remoteaddr: %s\n",
> + nv_get_string(nv, "remoteaddr%u", ii));
> + printf(" replication: %s\n",
> + nv_get_string(nv, "replication%u", ii));
> + str = nv_get_string(nv, "status%u", ii);
> + if (str != NULL)
> + printf(" status: %s\n", str);
> + printf(" dirty: %ju bytes\n",
> + (uintmax_t)nv_get_uint64(nv, "dirty%u", ii));
> + }
> + return (ret);
> +}
> +
> +static int
> +numfromstr(const char *str, intmax_t *nump)
> +{
> + intmax_t num;
> + char *suffix;
> + int rerrno;
> +
> + rerrno = errno;
> + errno = 0;
> + num = strtoimax(str, &suffix, 0);
> + if (errno == 0 && *suffix != '\0')
> + errno = EINVAL;
> + if (errno != 0)
> + return (-1);
> + *nump = num;
> + errno = rerrno;
> + return (0);
> +}
> +
> +int
> +main(int argc, char *argv[])
> +{
> + struct nv *nv;
> + intmax_t mediasize, extentsize, keepdirty;
> + int cmd, debug, error, ii;
> + const char *optstr;
> +
> + debug = 0;
> + mediasize = extentsize = keepdirty = 0;
> +
> + if (argc == 1)
> + usage();
> +
> + if (strcmp(argv[1], "create") == 0) {
> + cmd = CMD_CREATE;
> + optstr = "c:de:k:m:h";
> + } else if (strcmp(argv[1], "role") == 0) {
> + cmd = CMD_ROLE;
> + optstr = "c:dh";
> + } else if (strcmp(argv[1], "status") == 0) {
> + cmd = CMD_STATUS;
> + optstr = "c:dh";
> + } else if (strcmp(argv[1], "dump") == 0) {
> + cmd = CMD_DUMP;
> + optstr = "c:dh";
> + } else
> + usage();
> +
> + argc--;
> + argv++;
> +
> + for (;;) {
> + int ch;
> +
> + ch = getopt(argc, argv, optstr);
> + if (ch == -1)
> + break;
> + switch (ch) {
> + case 'c':
> + cfgpath = optarg;
> + break;
> + case 'd':
> + debug++;
> + break;
> + case 'e':
> + if (numfromstr(optarg, &extentsize) < 0)
> + err(1, "Invalid extentsize");
> + break;
> + case 'k':
> + if (numfromstr(optarg, &keepdirty) < 0)
> + err(1, "Invalid keepdirty");
> + break;
> + case 'm':
> + if (numfromstr(optarg, &mediasize) < 0)
> + err(1, "Invalid mediasize");
> + break;
> + case 'h':
> + default:
> + usage();
> + }
> + }
> + argc -= optind;
> + argv += optind;
> +
> + switch (cmd) {
> + case CMD_CREATE:
> + case CMD_ROLE:
> + if (argc == 0)
> + usage();
> + break;
> + }
> +
> + pjdlog_debug_set(debug);
> +
> + cfg = yy_config_parse(cfgpath);
> + assert(cfg != NULL);
> +
> + switch (cmd) {
> + case CMD_CREATE:
> + control_create(argc, argv, mediasize, extentsize, keepdirty);
> + /* NOTREACHED */
> + assert(!"What are we doing here?!");
> + break;
> + case CMD_DUMP:
> + /* Dump metadata from local component of the given resource. */
> + control_dump(argc, argv);
> + /* NOTREACHED */
> + assert(!"What are we doing here?!");
> + break;
> + case CMD_ROLE:
> + /* Change role for the given resources. */
> + if (argc < 2)
> + usage();
> + nv = nv_alloc();
> + nv_add_uint8(nv, HASTCTL_CMD_SETROLE, "cmd");
> + if (strcmp(argv[0], "init") == 0)
> + nv_add_uint8(nv, HAST_ROLE_INIT, "role");
> + else if (strcmp(argv[0], "primary") == 0)
> + nv_add_uint8(nv, HAST_ROLE_PRIMARY, "role");
> + else if (strcmp(argv[0], "secondary") == 0)
> + nv_add_uint8(nv, HAST_ROLE_SECONDARY, "role");
> + else
> + usage();
> + for (ii = 0; ii < argc - 1; ii++)
> + nv_add_string(nv, argv[ii + 1], "resource%d", ii);
> + break;
> + case CMD_STATUS:
> + /* Obtain status of the given resources. */
> + nv = nv_alloc();
> + nv_add_uint8(nv, HASTCTL_CMD_STATUS, "cmd");
> + if (argc == 0)
> + nv_add_string(nv, "all", "resource%d", 0);
> + else {
> + for (ii = 0; ii < argc; ii++)
> + nv_add_string(nv, argv[ii], "resource%d", ii);
> + }
> + break;
> + default:
> + assert(!"Impossible role!");
> + }
> +
> + /* Setup control connection... */
> + if (proto_client(cfg->hc_controladdr, &controlconn) < 0) {
> + pjdlog_exit(EX_OSERR,
> + "Unable to setup control connection to %s",
> + cfg->hc_controladdr);
> + }
> + /* ...and connect to hastd. */
> + if (proto_connect(controlconn) < 0) {
> + pjdlog_exit(EX_OSERR, "Unable to connect to hastd via %s",
> + cfg->hc_controladdr);
> + }
> + /* Send the command to the server... */
> + if (hast_proto_send(NULL, controlconn, nv, NULL, 0) < 0) {
> + pjdlog_exit(EX_UNAVAILABLE,
> + "Unable to send command to hastd via %s",
> + cfg->hc_controladdr);
> + }
> + nv_free(nv);
> + /* ...and receive reply. */
> + if (hast_proto_recv(NULL, controlconn, &nv, NULL, 0) < 0) {
> + pjdlog_exit(EX_UNAVAILABLE,
> + "cannot receive reply from hastd via %s",
> + cfg->hc_controladdr);
> + }
> +
> + error = nv_get_int16(nv, "error");
> + if (error != 0) {
> + pjdlog_exitx(EX_SOFTWARE, "Error %d received from hastd.",
> + error);
> + }
> + nv_set_error(nv, 0);
> +
> + switch (cmd) {
> + case CMD_ROLE:
> + error = control_set_role(nv, argv[0]);
> + break;
> + case CMD_STATUS:
> + error = control_status(nv);
> + break;
> + default:
> + assert(!"Impossible role!");
> + }
> +
> + exit(error);
> +}
>
> Added: head/sbin/hastd/Makefile
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sbin/hastd/Makefile Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,37 @@
> +# $FreeBSD$
> +
> +.include <bsd.own.mk>
> +
> +PROG= hastd
> +SRCS= activemap.c
> +SRCS+= control.c
> +SRCS+= ebuf.c
> +SRCS+= hast_proto.c hastd.c hooks.c
> +SRCS+= metadata.c
> +SRCS+= nv.c
> +SRCS+= secondary.c
> +SRCS+= parse.y pjdlog.c primary.c
> +SRCS+= proto.c proto_common.c proto_socketpair.c proto_tcp4.c proto_uds.c
> +SRCS+= rangelock.c
> +SRCS+= subr.c
> +SRCS+= token.l
> +SRCS+= y.tab.h
> +WARNS?= 6
> +MAN= hastd.8 hast.conf.5
> +
> +CFLAGS+=-I${.CURDIR}
> +CFLAGS+=-DINET
> +.if ${MK_INET6_SUPPORT} != "no"
> +CFLAGS+=-DINET6
> +.endif
> +# This is needed to have WARNS > 1.
> +CFLAGS+=-DYY_NO_UNPUT
> +
> +DPADD= ${LIBCRYPTO} ${LIBGEOM} ${LIBL} ${LIBPTHREAD} ${LIBUTIL}
> +LDADD= -lcrypto -lgeom -ll -lpthread -lutil
> +
> +YFLAGS+=-v
> +
> +CLEANFILES=y.tab.c y.tab.h y.output
> +
> +.include <bsd.prog.mk>
>
> Added: head/sbin/hastd/activemap.c
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sbin/hastd/activemap.c Thu Feb 18 23:16:19 2010 (r204076)
> @@ -0,0 +1,691 @@
> +/*-
> + * Copyright (c) 2009-2010 The FreeBSD Foundation
> + * All rights reserved.
> + *
> + * This software was developed by Pawel Jakub Dawidek under sponsorship from
> + * the FreeBSD Foundation.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include <sys/param.h> /* powerof2() */
> +#include <sys/queue.h>
> +
> +#include <assert.h>
> +#include <bitstring.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +
> +#include <activemap.h>
> +
> +#define ACTIVEMAP_MAGIC 0xac71e4
> +struct activemap {
> + int am_magic; /* Magic value. */
> + off_t am_mediasize; /* Media size in bytes. */
> + uint32_t am_extentsize; /* Extent size in bytes,
> + must be power of 2. */
> + uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */
> + int am_nextents; /* Number of extents. */
> + size_t am_mapsize; /* Bitmap size in bytes. */
> + uint16_t *am_memtab; /* An array that holds number of pending
> + writes per extent. */
> + bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */
> + bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */
> + size_t am_diskmapsize; /* Map size rounded up to sector size. */
> + uint64_t am_ndirty; /* Number of dirty regions. */
> + bitstr_t *am_syncmap; /* Bitmap of extents to sync. */
> + off_t am_syncoff; /* Next synchronization offset. */
> + TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
> + we keep dirty to reduce bitmap
> + updates. */
> + int am_nkeepdirty; /* Number of am_keepdirty elements. */
> + int am_nkeepdirty_limit; /* Maximum number of am_keepdirty
> + elements. */
> +};
> +
> +struct keepdirty {
> + int kd_extent;
> + TAILQ_ENTRY(keepdirty) kd_next;
> +};
> +
> +/*
> + * Helper function taken from sys/systm.h to calculate extentshift.
> + */
> +static uint32_t
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>
More information about the svn-src-all
mailing list