ports/63933: [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup
Oliver Eikemeier
eikemeier at fillmore-labs.com
Mon Mar 8 17:00:33 UTC 2004
>Number: 63933
>Category: ports
>Synopsis: [PATCH] bsd.port.mk: support for sorting MASTER_SITES by table lookup
>Confidential: no
>Severity: non-critical
>Priority: low
>Responsible: freebsd-ports-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: change-request
>Submitter-Id: current-users
>Arrival-Date: Mon Mar 08 09:00:32 PST 2004
>Closed-Date:
>Last-Modified:
>Originator: Oliver Eikemeier
>Release: FreeBSD 4.9-STABLE i386
>Organization:
Fillmore Labs - http://www.fillmore-labs.com
>Environment:
System: FreeBSD nuuk.fillmore-labs.com 4.9-STABLE
>Description:
Currently there are four ways to influence the order of the sites a port tries
to fetch its distfiles from:
- reorder MASTER_SITES in the ports Makefile
- reorder MASTER_SITE_* in the bsd.sites.mk
- add a specially formated MASTER_SORT_REGEX in /etc/make.conf
- define RANDOMIZE_MASTER_SITES (which shuffles MASTER_SITE_OVERRIDE and
MASTER_SITE_BACKUP too)
This patch adds support for a custom table, /var/db/distrank/ranks, consisting of
site (a host) - rank (a number) pairs. A ports MASTER_SITES are looked up in this
table and are sorted by the respective ranking. If a site isn't found in the table,
is it put at the end of the list. A small 'fuzz' factor is added to every value,
so that sites with numerically nearby ranking have a chance to be used alternating.
Currently, the results are sorted again by MASTER_SORT_REGEX.
The ranking has to be site specific, and could be generated by downloading ISP specific
tables or calculated using GeoIP. A skeleton for a port that measures latency is attached
to this PR to demonstrate the generation of a table, but is not an integral part of this
patch. I might make this into a port if the patch is committed.
>How-To-Repeat:
>Fix:
Index: bsd.port.mk
===================================================================
RCS file: /home/ncvs/ports/Mk/bsd.port.mk,v
retrieving revision 1.484
diff -u -r1.484 bsd.port.mk
--- bsd.port.mk 4 Feb 2004 04:27:04 -0000 1.484
+++ bsd.port.mk 8 Mar 2004 11:56:25 -0000
@@ -1938,6 +1938,25 @@
FETCH_REGET?= 0
.endif
+RANKFILE?= /var/db/distrank/ranks
+
+.if exists(${RANKFILE})
+RANK_FUZZ?= 5
+RANK_HIGH?= 9999
+_RANK_SITES?= | ${TR} -s ' \t' '\n' \
+ | ${AWK} 'BEGIN { \
+ IGNORECASE = 1; \
+ while(getline < "${RANKFILE}" > 0) { rank[tolower($$1)]=$$2 } \
+ } \
+ { \
+ pos=rank[tolower(gensub(/^(f|ht)tp:\/\/([^ \/]+)\/.*/, "\\\2", 1))]; \
+ print $$0 "\t" url "\t" (pos ? pos + rand() * ${RANK_FUZZ} : ${RANK_HIGH}) \
+ }' \
+ | ${SORT} -n -k 2 | ${CUT} -f 1 | ${TR} '\n' ' '
+.else
+_RANK_SITES?= ''
+.endif
+
.if defined(RANDOMIZE_MASTER_SITES)
.if exists(/usr/games/random)
RANDOM_CMD?= /usr/games/random
@@ -2453,7 +2472,7 @@
. if !target(master-sites-${_group})
SORTED_MASTER_SITES_${_group}_CMD= cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} master-sites-${_group}
master-sites-${_group}:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
. endif
. endfor
. endif
@@ -2465,7 +2484,7 @@
. if !target(patch-sites-${_group})
SORTED_PATCH_SITES_${_group}_CMD= cd ${.CURDIR} && ${MAKE} ${__softMAKEFLAGS} patch-sites-${_group}
patch-sites-${_group}:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_${_group}}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
. endif
. endfor
. endif
@@ -2498,14 +2517,14 @@
.endfor
master-sites-ALL:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
patch-sites-ALL:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_ALL}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
# has similar effect to old targets, i.e., access only {MASTER,PATCH}_SITES, not working with the new _n variables
master-sites-DEFAULT:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_MASTER_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
patch-sites-DEFAULT:
- @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
+ @${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} '${_PATCH_SITES_DEFAULT}' ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}
# synonyms, mnemonics
master-sites-all: master-sites-ALL
@@ -3140,7 +3159,7 @@
fi \
done; \
___MASTER_SITES_TMP= ; \
- SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+ SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
else \
SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \
fi ; \
@@ -3193,7 +3212,7 @@
fi \
done; \
___PATCH_SITES_TMP= ; \
- SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+ SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
else \
SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \
fi ; \
@@ -4133,7 +4152,7 @@
fi \
done; \
___MASTER_SITES_TMP= ; \
- SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+ SORTED_MASTER_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__MASTER_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
else \
SORTED_MASTER_SITES_CMD_TMP="${SORTED_MASTER_SITES_DEFAULT_CMD}" ; \
fi ; \
@@ -4165,7 +4184,7 @@
fi \
done; \
___PATCH_SITES_TMP= ; \
- SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
+ SORTED_PATCH_SITES_CMD_TMP="${ECHO_CMD} ${_MASTER_SITE_OVERRIDE} `${ECHO_CMD} $${__PATCH_SITES_TMP} ${_RANK_SITES} | ${AWK} '${MASTER_SORT_AWK:S|\\|\\\\|g}'` ${_MASTER_SITE_BACKUP}" ; \
else \
SORTED_PATCH_SITES_CMD_TMP="${SORTED_PATCH_SITES_DEFAULT_CMD}" ; \
fi ; \
I guess before/after committing this port, the sorting code should be refactored.
I volunteer if nobody else steps up.
Sample shell script to generate /var/db/distrank/ranks (requires port net/fping).
I am aware of the following issues:
- the script has a running time of over two hours, it could be faster if parallelized
- latency is a bad measure of download speed, bandwith might be better
- some hosts do block ICMP echo packets
- network bandwith changes over time
#!/bin/sh -e
#
# Copyright (c) 2004 Oliver Eikemeier. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright notice
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the author nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# $FreeBSD$
#
if [ ! -d "${DBDIR:=/var/db/distrank}" ]; then
mkdir -p "${DBDIR}"
fi
echo "ranking ports MASTER_SITES"
echo "please be patient, this may take more than two hours"
cd "${PORTSDIR:=/usr/ports}"
CATEGORIES=`make -VSUBDIR`
for category in ${CATEGORIES}; do
if [ ! -d "${PORTSDIR}/${category}" ]; then continue; fi
cd "${PORTSDIR}/${category}"
PORTS=`make -VSUBDIR`
for port in ${PORTS}; do
if [ ! -d "${PORTSDIR}/${category}/${port}" ]; then continue; fi
cd "${PORTSDIR}/${category}/${port}"
make -DFETCH_ALL -VMASTER_SITES -VPATCH_SITES makesum 2>/dev/null || true
done
done \
| tr -s ' \t' '\n' \
| tr '[:upper:]' '[:lower:]' \
| sed -nE 's;^(f|ht)tp://([^/]+)/.*$;\2;p' \
| sort -u \
| fping -q -c 3 2>&1 \
| sed -nE 's;^([^ :/]+) *:.*min/avg/max += +[0-9.]+/([0-9.]+)/[0-9.]+.*$;\1 \2;p' \
| sort -n -k 2 \
> "${DBDIR}/ranks"
chmod a+r "${DBDIR}/ranks"
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-ports-bugs
mailing list