svn commit: r247766 - user/cperciva/portsnap-mirror
Colin Percival
cperciva at FreeBSD.org
Mon Mar 4 05:26:11 UTC 2013
Author: cperciva
Date: Mon Mar 4 05:26:09 2013
New Revision: 247766
URL: http://svnweb.freebsd.org/changeset/base/247766
Log:
Add portsnap mirroring code, previously in the (now defunct) CVS projects
repository.
Added:
user/cperciva/portsnap-mirror/
user/cperciva/portsnap-mirror/pmirror.sh (contents, props changed)
Added: user/cperciva/portsnap-mirror/pmirror.sh
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ user/cperciva/portsnap-mirror/pmirror.sh Mon Mar 4 05:26:09 2013 (r247766)
@@ -0,0 +1,337 @@
+#!/bin/sh -e
+
+#-
+# Copyright 2005 Colin Percival
+# All rights reserved
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted providing that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+# $FreeBSD$
+
+# READ THIS BEFORE USING THIS CODE
+# --------------------------------
+#
+# On average, portsnap requires 2-5MB/month of bandwidth to keep a
+# single machine up to date. If several machines are sharing an
+# HTTP proxy, a significant fraction of this can be cached.
+#
+# In contrast, using this code to keep a portsnap *mirror* up to
+# date requires roughly 1GB of disk space and 5GB/month of bandwidth.
+# This is because of the "graceful failure" mechanisms built into
+# portsnap -- it can usually take advantage of pregenerated patches,
+# but a mirror needs to have lots of larger files just in case they
+# are needed.
+#
+# This means that, in terms of bandwidth, running a portsnap mirror
+# is completely and utterly pointless unless you expect more than
+# 1000 portsnap-running systems to be using the mirror. In fact,
+# it's worse than pointless, since it would consume bandwidth and
+# increase the load on existing mirrors (since the mirroring would
+# require more work than serving those <1000 machines from the
+# existing mirrors).
+#
+# For reference, the number of systems running portsnap at the end
+# of 2005 is roughly 4500.
+#
+# In short: Even if you already run FreeBSD CVSup, WWW, and FTP
+# mirrors, you shouldn't necessarily start running a portsnap mirror
+# as well. Please talk to me (cperciva at FreeBSD.org) before you
+# start chewing up bandwidth.
+
+# Usage:
+# lockf -s -t 0 lockfile \
+# sh -e pmirror.sh portsnap-master.freebsd.org /path/to/www
+
+if [ $# -ne 2 ]; then
+ echo "Usage: pmirror.sh portsnap-master.freebsd.org /path/to/www"
+ exit 1
+fi
+
+WRKDIR=`mktemp -d -t pmirror` || exit 1
+chown :`id -ng` ${WRKDIR}
+cd ${WRKDIR}
+
+SERVER=$1
+PUBDIR=$2
+PHTTPGET="/usr/libexec/phttpget ${SERVER}"
+
+export HTTP_USER_AGENT="pmirror/0.9"
+
+# If ${PUBDIR}/pub.ssl does not exist, assume we have an empty
+# mirror directory and set things up.
+if ! [ -f ${PUBDIR}/pub.ssl ]; then
+ mkdir -p ${PUBDIR} ${PUBDIR}/bp ${PUBDIR}/f \
+ ${PUBDIR}/s ${PUBDIR}/t ${PUBDIR}/tp
+ touch ${PUBDIR}/latest.ssl
+ echo 'User-agent: *' > ${PUBDIR}/robots.txt
+ echo 'Disallow: /' >> ${PUBDIR}/robots.txt
+fi
+
+${PHTTPGET} pub.ssl snapshot.ssl latest.ssl 2>&1 |
+ grep -v "200 OK" || true
+[ -f pub.ssl -a -f snapshot.ssl -a -f latest.ssl ]
+
+if cmp -s latest.ssl ${PUBDIR}/latest.ssl; then
+ cd /tmp/
+ rm -r ${WRKDIR}
+ exit 0
+fi
+
+echo "`date`: Fetching binary files list"
+rm -f bl.gz bl bp.wanted bp.present
+fetch -q http://${SERVER}/bl.gz
+[ -f bl.gz ] || exit 1
+gunzip -c bl.gz > bl
+
+echo "`date`: Constructing list of binary patches wanted"
+LASTSNAP=`cut -f 2 -d '|' bl | grep -E '^[0-9]+$' | sort -urn | head -1`
+awk -F \| -v cutoff=`expr ${LASTSNAP} - 86400` \
+ '{ if ($2 > cutoff) { print } }' bl |
+ join -t '|' bl - |
+ awk -F \| '{ if ($4 > $2) { print $3 "-" $5 } }' |
+ sort | grep -E '^[0-9a-f]{64}-[0-9a-f]{64}$' > bp.wanted
+( cd ${PUBDIR}/bp/ && ls ) |
+ grep -E '^[0-9a-f]{64}-[0-9a-f]{64}$' > bp.present || true
+echo "`date`: Fetching needed binary patches"
+comm -13 bp.present bp.wanted | lam -s 'bp/' - |
+ ( cd ${PUBDIR}/bp/ && xargs ${PHTTPGET} ) 2>&1 |
+ grep -v "200 OK" || true
+echo "`date`: Removing unneeded binary patches"
+comm -23 bp.present bp.wanted | ( cd ${PUBDIR}/bp/ && xargs rm )
+
+echo "`date`: Fetching metadata files list"
+rm -f tl.gz tl
+fetch -q http://${SERVER}/tl.gz
+[ -f tl.gz ] || exit 1
+gunzip -c tl.gz > tl
+
+echo "`date`: Constructing list of files wanted"
+awk -F \| -v cutoff=`expr ${LASTSNAP} - 86400` \
+ '{ if ($2 > cutoff) { print $3 ".gz" } }' bl |
+ grep -E '^[0-9a-f]{64}\.gz$' > f.wanted || true
+awk -F \| -v cutoff=`expr ${LASTSNAP} - 691200` \
+ '{ if ($2 > cutoff) { print $3 ".gz" } }' tl |
+ grep -E '^[0-9a-f]{64}\.gz$' >> f.wanted || true
+sort f.wanted > f.wanted.tmp
+mv f.wanted.tmp f.wanted
+( cd ${PUBDIR}/f/ && ls ) |
+ grep -E '^[0-9a-f]{64}\.gz$' > f.present || true
+echo "`date`: Fetching needed files"
+comm -13 f.present f.wanted | lam -s 'f/' - |
+ ( cd ${PUBDIR}/f/ && xargs ${PHTTPGET} ) 2>&1 |
+ grep -v "200 OK" || true
+echo "`date`: Removing corrupt files"
+comm -13 f.present f.wanted | tr -d '.gz' | while read F; do
+ if [ -f ${PUBDIR}/f/${F}.gz ] &&
+ ! [ `gunzip < ${PUBDIR}/f/${F}.gz` | sha256` = $F ]; then
+ echo "Deleting f/$F.gz"
+ rm ${PUBDIR}/f/${F}.gz
+ fi
+done
+echo "`date`: Removing unneeded files"
+comm -23 f.present f.wanted | ( cd ${PUBDIR}/f/ && xargs rm )
+
+echo "`date`: Fetching extra files list"
+rm -f el.gz el
+fetch -q http://${SERVER}/el.gz
+[ -f el.gz ] || exit 1
+gunzip -c el.gz > el
+
+echo "`date`: Constructing list of snapshots wanted"
+grep -E '^s/' el | cut -f 2 -d '/' |
+ sort | grep -E '^[0-9a-f]{64}\.tgz$' > s.wanted || true
+( cd ${PUBDIR}/s/ && ls ) |
+ grep -E '^[0-9a-f]{64}\.tgz$' > s.present || true
+echo "`date`: Fetching needed snapshots"
+comm -13 s.present s.wanted | lam -s 's/' - |
+ ( cd ${PUBDIR}/s/ && xargs ${PHTTPGET} ) 2>&1 |
+ grep -v "200 OK" || true
+echo "`date`: Removing unneeded snapshots"
+comm -23 s.present s.wanted | ( cd ${PUBDIR}/s/ && xargs rm )
+
+echo "`date`: Constructing list of tags wanted"
+grep -E '^t/' el | cut -f 2 -d '/' |
+ sort | grep -E '^[0-9a-f]{64}$' > t.wanted || true
+( cd ${PUBDIR}/t/ && ls ) |
+ grep -E '^[0-9a-f]{64}$' > t.present || true
+echo "`date`: Fetching needed tags"
+comm -13 t.present t.wanted | lam -s 't/' - |
+ ( cd ${PUBDIR}/t/ && xargs ${PHTTPGET} ) 2>&1 |
+ grep -v "200 OK" || true
+
+# Don't bother deleting old tag files. They don't take up any
+# significant space, and keeping them is useful for statistical
+# purposes.
+# echo "`date`: Removing unneeded tags"
+# comm -23 t.present t.wanted | ( cd ${PUBDIR}/t/ && xargs rm )
+
+echo "`date`: Constructing list of metadata patches wanted"
+awk -F \| -v cutoff=`expr ${LASTSNAP} - 86400` \
+ '{ if ($2 > cutoff) { print } }' tl |
+ join -t '|' tl - |
+ awk -F \| '{ if ($4 > $2) { print $3 "-" $5 ".gz" } }' |
+ sort | grep -E '^[0-9a-f]{64}-[0-9a-f]{64}\.gz$' > tp.wanted || true
+awk -F \| -v cutoff=`expr ${LASTSNAP} - 86400` \
+ '{ if ($2 > cutoff) { print } }' tl |
+ join -t '|' tl - |
+ fgrep "|${LASTSNAP}|" |
+ awk -F \| '{ if ($4 > $2) { print $3 "-" $5 ".gz" } }' |
+ sort | grep -E '^[0-9a-f]{64}-[0-9a-f]{64}\.gz$' > tp.needed || true
+( cd ${PUBDIR}/tp/ && ls ) |
+ grep -E '^[0-9a-f]{64}-[0-9a-f]{64}\.gz$' > tp.present || true
+
+echo "`date`: Generating needed metadata patches"
+# This generates lines of the form RECENTHASH|OLDHASH|NEWHASH,
+# where RECENTHASH is the most recent metadata file of the same
+# type which existed prior to this mirroring run.
+# This list is also sorted starting with the most recent OLDHASH.
+#
+# If there are no existing metadata files of the relevant type
+# then the metadata patches won't be created. Sorry. They'll
+# all be created the next time.
+
+sort -k 3 -t '|' tl > tl.sorted
+
+cut -f 1 -d '.' f.present |
+ join -2 3 -t '|' - tl.sorted |
+ sort -k 3 -t '|' |
+ perl -e '
+ while (<>) {
+ @_ = split /\|/;
+ $l{$_[1]} = $_[0]
+ };
+ for $f (sort(keys %l)) {
+ print "$f|$l{$f}\n"
+ }' > metadata.latest
+
+comm -13 tp.present tp.needed |
+ cut -f 1 -d '.' |
+ tr '-' '|' |
+ join -o 1.1,1.2,2.1,2.2 -1 3 -t '|' tl.sorted - |
+ sort |
+ join -o 1.2,2.2,2.3,2.4 -t '|' metadata.latest - |
+ sort -rn -k 2 -t '|' |
+ cut -f 1,3,4 -d '|' |
+while read LINE; do
+ X=`echo ${LINE} | cut -f 2 -d '|'`
+ Y=`echo ${LINE} | cut -f 3 -d '|'`
+ M=`echo ${LINE} | cut -f 1 -d '|'`
+
+ if [ ! -f "${PUBDIR}/tp/${X}-${M}.gz" ] ||
+ [ ! -f "${PUBDIR}/tp/${M}-${Y}.gz" ]; then
+ gunzip -c < ${PUBDIR}/f/${X}.gz | sort > ${X}
+ gunzip -c < ${PUBDIR}/f/${Y}.gz | sort > ${Y}
+ perl -e '
+ open F, $ARGV[0];
+ open G, $ARGV[1];
+ $s = <F>;
+ $t = <G>;
+ do {
+ if ($s eq $t) {
+ $s = <F>;
+ $t = <G>;
+ } elsif ((! $t) || ($s && ($s lt $t))) {
+ @s = split /\|/, $s;
+ print "-$s[0]\n";
+ $s = <F>;
+ } else {
+ print "+$t";
+ $t = <G>;
+ }
+ } while ($s || $t)' ${X} ${Y} |
+ sort -k 1.2,1 -t '|' > ${X}-${Y}
+ rm ${X} ${Y}
+ else
+ gunzip -c "${PUBDIR}/tp/${X}-${M}.gz" | sort -r |
+ sort -s -k 1.2,1 -t '|' > ${X}-${M}
+ gunzip -c "${PUBDIR}/tp/${M}-${Y}.gz" | sort -r |
+ sort -s -k 1.2,1 -t '|' > ${M}-${Y}
+ perl -e '
+ open F, $ARGV[0];
+ open G, $ARGV[1];
+ $s = <F>;
+ $t = <G>;
+ while ($s || $t) {
+ chomp $s;
+ chomp $t;
+
+ if (! $t) {
+ print "$s\n";
+ $s = <F>;
+ next;
+ };
+ if (! $s) {
+ print "$t\n";
+ $t = <G>;
+ next;
+ };
+
+ @s = split //, $s, 2;
+ @s2 = split /\|/, $s[1];
+ @t = split //, $t, 2;
+ @t2 = split /\|/, $t[1];
+
+ if ($s2[0] lt $t2[0]) {
+ print "$s\n";
+ $s = <F>;
+ next;
+ };
+ if ($s2[0] gt $t2[0]) {
+ print "$t\n";
+ $t = <G>;
+ next;
+ };
+
+ if ($s[0] eq "-") {
+ print "$s\n";
+ } else {
+ $t = <G>;
+ };
+ $s = <F>;
+ }' ${X}-${M} ${M}-${Y} \
+ > ${X}-${Y}
+ rm ${X}-${M} ${M}-${Y}
+ fi
+
+ gzip -9n ${X}-${Y}
+ mv ${X}-${Y}.gz ${PUBDIR}/tp/
+done
+
+echo "`date`: Removing unneeded metadata patches"
+comm -23 tp.present tp.wanted | ( cd ${PUBDIR}/tp/ && xargs rm )
+
+echo "`date`: Publishing file lists and signatures"
+mv bl.gz el.gz tl.gz ${PUBDIR}
+mv latest.ssl pub.ssl snapshot.ssl ${PUBDIR}
+
+echo "`date`: Removing temporary files"
+rm bl el tl
+rm tl.sorted metadata.latest
+rm bp.wanted bp.present
+rm f.wanted f.present
+rm s.present s.wanted
+rm t.present t.wanted
+rm tp.present tp.wanted tp.needed
+
+# Remove temporary directory
+cd /tmp/
+rmdir ${WRKDIR}
More information about the svn-src-user
mailing list