Commit 3aef697a authored by Mohammad Akhlaghi's avatar Mohammad Akhlaghi

Automatically using backup/mirror server for tarballs

Until now, if a tarball couldn't be downloaded from its own URL, the
pipeline would completely stop. This is very annoying because as the number
of software increases, the possibility of atleast one of the software
tarball URLs being in-accessible (for example because maintainance)
increases. Some webpages may also have problems in some locations.

With this commit, a `mirror.conf' file has been added to the
template. Through this file, a project's designer can define a different
mirror to use for their project. When a software tarball can't be
downloaded from its original URL, the template will parse the
`mirrors.conf' file and look for the software in those servers.

To do this, it is necessary that when possible, the tarball names on the
mirrors be the same as the tarball name in the software webpage. Cases when
its not possible is when the tarball name doesn't actually have the
software name in it.

But it is necessary to check this a little more. Until the tests are done,
I'll keep this on a separate branch.
parent 29540c2b
......@@ -4,7 +4,8 @@
# the top project directory (for the shebang above), this script must be
# run like this:
#
# $ /path/to/download-multi-try downloader lockfile input-url downloaded-name
# $ /path/to/download-multi-try downloader lockfile url-dir \
# url-file downloaded-name mirror-file
#
# NOTE: The `downloader' must contain the option to specify the output name
# in its end. For example "wget -O". Any other option can also be placed in
......@@ -50,21 +51,26 @@ set -e
# Input arguments and necessary sanity checks.
inurl="$3"
outname="$4"
urldir="$3"
urlfile="$4"
outname="$5"
lockfile="$2"
downloader="$1"
if [ "x$downloader" = x ]; then
echo "$0: downloader (first argument) not given."; exit 1;
mirrorfile="$6"
if [ "x$urldir" = x ]; then
echo "$0: input's host URL (third argument) not given."; exit 1;
fi
if [ "x$urlfile" = x ]; then
echo "$0: input's host filename (fourth argument) not given."; exit 1;
fi
if [ "x$outname" = x ]; then
echo "$0: output name (fifth argument) not given."; exit 1;
fi
if [ "x$lockfile" = x ]; then
echo "$0: lock file (second argument) not given."; exit 1;
fi
if [ "x$inurl" = x ]; then
echo "$0: full input URL (third argument) not given."; exit 1;
fi
if [ "x$outname" = x ]; then
echo "$0: output name (fourth argument) not given."; exit 1;
if [ "x$downloader" = x ]; then
echo "$0: downloader (first argument) not given."; exit 1;
fi
......@@ -98,12 +104,25 @@ while [ ! -f "$outname" ]; do
sleep $tstep
fi
# Attempt downloading the file (one-at-a-time). Note that the
# Attempt downloading each file (once-at-a-time) from its given URL,
# then trying the mirror URLs in the given mirror file. Note that the
# `downloader' ends with the respective option to specify the output
# name. For example "wget -O" (so `outname', that comes after it) will
# be the name of the downloaded file.
flock "$lockfile" bash -c \
"if ! $downloader $outname $inurl; then rm -f $outname; fi"
if [ -f $mirrorfile ]; then
awk 'BEGIN{print "'$urldir'"} !/^#/ && NF {print $1}' $mirrorfile \
| while IFS= read -r line; do \
echo; echo; echo "line: $line"; \
dcom="if ! $downloader $outname $line/$urlfile; then rm -f $outname; fi"; \
flock "$lockfile" bash -c "$dcom"; \
if [ -f $outname ]; then break; \
else echo "Download failed: $line/$urlfile"; \
fi; \
done
else
flock "$lockfile" bash -c \
"if ! $downloader $outname $urldir/$urlfile; then rm -f $outname; fi"
fi
done
......
......@@ -62,20 +62,20 @@ $(inputdatasets): $(indir)/%.fits: | $(indir) $(lockdir)
# Download (or make the link to) the input dataset.
if [ -f $(INDIR)/$$origname ]; then
ln -s $(INDIR)/$$origname $@
ln -s $(INDIR)/$$origname "$@.unchecked"
else
touch $(lockdir)/download
$(downloadwrapper) "wget --no-use-server-timestamps -O" \
$(lockdir)/download $$url/$$origname $@
$(lockdir)/download $$url $$origname \
"$@.unchecked"
fi
# Check the md5 sum to see if this is the proper dataset.
sum=$$(md5sum $@ | awk '{print $$1}')
if [ $$sum != $$mdf ]; then
wrongname=$(dir $@)/wrong-$(notdir $@)
mv $@ $$wrongname
echo; echo; echo "Wrong MD5 checksum for '$$origname' in $$wrongname"
echo; echo; exit 1
sum=$$(md5sum "$@.unchecked" | awk '{print $$1}')
if [ $$sum = $$mdf ]; then mv "$@.unchecked" $@;
else
echo; echo "Wrong MD5 checksum for '$$url/$$origname'"
echo; exit 1
fi
......
......@@ -652,10 +652,10 @@ gcc_works=0
testprog=$tmpblddir/test-c
testsource=$tmpblddir/test.c
echo; echo; echo "Checking host C compiler...";
echo "#include <stdio.h>" > $testsource
echo "#include <stdlib.h>" >> $testsource
echo "int main(void){printf(\"...C compiler works.\");" >> $testsource
echo " return EXIT_SUCCESS;}" >> $testsource
echo "#include <stdio.h>" > $testsource
echo "#include <stdlib.h>" >> $testsource
echo "int main(void){printf(\"...C compiler works.\n\");" >> $testsource
echo " return EXIT_SUCCESS;}" >> $testsource
if gcc $testsource -o$testprog && $testprog; then
rm $testsource $testprog
else
......@@ -1060,8 +1060,17 @@ if ! [ -f $tardir/$flocktar ]; then
if [ -f $ddir/$flocktar ]; then
cp $ddir/$flocktar $ucname
else
if ! $downloader $ucname $flockurl/$flocktar; then
rm -f $ucname;
# Try downloading the tarball from its official URL and if it
# fails, try from the mirror URLs.
awk 'BEGIN{print "'$flockurl'"} !/^#/ && NF {print $1}' \
$cdir/installation/mirrors.conf \
| while IFS= read -r line \
&& ! $downloader $ucname "$line/$flocktar"; do \
echo "Download of '$flocktar' failed from '$line'"; \
done
# See if the download succeeded.
if ! [ -f $ucname ]; then
echo
echo "DOWNLOAD ERROR: Couldn't download the 'flock' tarball:"
echo " $flockurl"
......
# Mirror URLs in case software tarballs can't be downloaded.
#
# The mirrors will be checked in the order of this file.
#
# Copyright (C) 2019 Mohammad Akhlaghi <mohammad@akhlaghi.org>
#
# This script is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This script is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details. See <http://www.gnu.org/licenses/>.
http://akhlaghi.org/reproduce-software
\ No newline at end of file
......@@ -45,6 +45,7 @@ idir = $(BDIR)/software/installed
ibdir = $(BDIR)/software/installed/bin
ildir = $(BDIR)/software/installed/lib
ibidir = $(BDIR)/software/installed/version-info/proglib
mirrors = reproduce/software/config/installation/mirrors.conf
# We'll need the system's PATH for making links to low-level programs we
# won't be building ourselves.
......@@ -149,7 +150,7 @@ $(tarballs): $(tdir)/%: | $(lockdir)
-e's/\./ /g' \
| awk '{print $$1}' ); \
\
mergenames=1; \
infile=$*; \
if [ $$n = bash ]; then c=$(bash-checksum); w=http://akhlaghi.org/reproduce-software; \
elif [ $$n = binutils ]; then c=$(binutils-checksum); w=http://ftp.gnu.org/gnu/binutils; \
elif [ $$n = bzip ]; then c=$(bzip2-checksum); w=http://akhlaghi.org/reproduce-software; \
......@@ -171,9 +172,9 @@ $(tarballs): $(tdir)/%: | $(lockdir)
elif [ $$n = libtool ]; then c=$(libtool-checksum); w=http://ftp.gnu.org/gnu/libtool; \
elif [ $$n = lzip ]; then c=$(lzip-checksum); w=http://download.savannah.gnu.org/releases/lzip; \
elif [ $$n = m ]; then \
mergenames=0; \
c=$(m4-checksum); \
w=http://akhlaghi.org/reproduce-software/m4-1.4.18-patched.tar.gz; \
infile=m4-$(m4-version)-patched.tar.gz; \
w=http://akhlaghi.org/reproduce-software; \
elif [ $$n = make ]; then c=$(make-checksum); w=http://akhlaghi.org/reproduce-software; \
elif [ $$n = metastore ]; then c=$(metastore-checksum); w=http://akhlaghi.org/reproduce-software; \
elif [ $$n = mpc ]; then c=$(mpc-checksum); w=http://ftp.gnu.org/gnu/mpc; \
......@@ -187,16 +188,18 @@ $(tarballs): $(tdir)/%: | $(lockdir)
elif [ $$n = tar ]; then c=$(tar-checksum); w=http://ftp.gnu.org/gnu/tar; \
elif [ $$n = texinfo ]; then c=$(texinfo-checksum); w=http://ftp.gnu.org/gnu/texinfo; \
elif [ $$n = unzip ]; then \
v=$$(echo $(unzip-version) | sed -e's/\.//'); \
w=ftp://ftp.info-zip.org/pub/infozip/src; \
c=$(unzip-checksum); \
mergenames=0; v=$$(echo $(unzip-version) | sed -e's/\.//'); \
w=ftp://ftp.info-zip.org/pub/infozip/src/unzip$$v.tgz; \
infile=unzip$$v.tgz; \
elif [ $$n = wget ]; then c=$(wget-checksum); w=http://ftp.gnu.org/gnu/wget; \
elif [ $$n = which ]; then c=$(which-checksum); w=http://ftp.gnu.org/gnu/which; \
elif [ $$n = xz ]; then c=$(xz-checksum); w=http://tukaani.org/xz; \
elif [ $$n = zip ]; then \
v=$$(echo $(zip-version) | sed -e's/\.//'); \
w=ftp://ftp.info-zip.org/pub/infozip/src; \
c=$(zip-checksum); \
mergenames=0; v=$$(echo $(zip-version) | sed -e's/\.//'); \
w=ftp://ftp.info-zip.org/pub/infozip/src/zip$$v.tgz; \
infile=zip$$v.tgz; \
elif [ $$n = zlib ]; then c=$(zlib-checksum); w=http://www.zlib.net; \
else \
echo; echo; echo; \
......@@ -205,35 +208,28 @@ $(tarballs): $(tdir)/%: | $(lockdir)
exit 1; \
fi; \
\
\
if [ -f $(DEPENDENCIES-DIR)/$* ]; then \
cp $(DEPENDENCIES-DIR)/$* "$@.unchecked"; \
else \
if [ $$mergenames = 1 ]; then tarballurl=$$w/"$*"; \
else tarballurl=$$w; \
fi; \
\
echo "Downloading $$tarballurl"; \
if [ -f $(ibdir)/wget ]; then \
downloader="wget --no-use-server-timestamps -O"; \
else \
downloader="$(DOWNLOADER)"; \
fi; \
\
touch $(lockdir)/download; \
$(downloadwrapper) "$$downloader" $(lockdir)/download \
$$tarballurl "$@.unchecked"; \
$$w $$infile "$@.unchecked" $(mirrors); \
fi; \
\
\
if type sha512sum > /dev/null 2>/dev/null; then \
checksum=$$(sha512sum "$@.unchecked" | awk '{print $$1}'); \
echo "$*: should be '$$c', is '$$checksum'"; \
if [ x$$checksum = x$$c ]; then mv "$@.unchecked" "$@"; \
else echo "ERROR: Non-matching checksum for '$*'."; exit 1; \
else \
echo; echo; echo "$*: should be '$$c', is '$$checksum'"; \
echo "ERROR: Non-matching checksum for '$*'."; exit 1; \
fi; \
else mv "$@.unchecked" "$@"; \
fi;
fi
......
......@@ -45,6 +45,7 @@ itidir = $(BDIR)/software/installed/version-info/tex
ictdir = $(BDIR)/software/installed/version-info/cite
ipydir = $(BDIR)/software/installed/version-info/python
ibidir = $(BDIR)/software/installed/version-info/proglib
mirrors = reproduce/software/config/installation/mirrors.conf
# Set the top-level software to build.
all: $(foreach p, $(top-level-programs), $(ibidir)/$(p)) \
......@@ -172,66 +173,51 @@ $(tarballs): $(tdir)/%: | $(lockdir)
n=$$(echo $* | sed -e's/[0-9\-]/ /g' -e's/\./ /g' \
| awk '{print $$1}' )
# Set the top download link of the requested tarball.
mergenames=1
# Set the top download link of the requested tarball.
infile=$*
if [ $$n = astrometry ]; then c=$(astrometrynet-checksum); w=http://astrometry.net/downloads
elif [ $$n = atlas ]; then
mergenames=0
c=$(atlas-checksum)
w=https://sourceforge.net/projects/math-atlas/files/Stable/$(atlas-version)/atlas$(atlas-version).tar.bz2/download
elif [ $$n = atlas ]; then c=$(atlas-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = cairo ]; then c=$(cairo-checksum); w=https://www.cairographics.org/releases
elif [ $$n = cdsclient ]; then c=$(cdsclient-checksum); w=http://cdsarc.u-strasbg.fr/ftp/pub/sw
elif [ $$n = cfitsio ]; then c=$(cfitsio-checksum); w=https://heasarc.gsfc.nasa.gov/FTP/software/fitsio/c
elif [ $$n = cmake ]; then
mergenames=0
c=$(cmake-checksum)
majv=$$(echo $(cmake-version) \
| sed -e's/\./ /' \
| awk '{printf("%d.%d", $$1, $$2)}')
w=https://cmake.org/files/v$$majv/cmake-$(cmake-version).tar.gz
w=https://cmake.org/files/v$$majv
elif [ $$n = fftw ]; then c=$(fftw-checksum); w=ftp://ftp.fftw.org/pub/fftw
elif [ $$n = freetype ]; then c=$(freetype-checksum); w=https://download.savannah.gnu.org/releases/freetype
elif [ $$n = ghostscript ]; then c=$(ghostscript-checksum); w=https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs926
elif [ $$n = gnuastro ]; then c=$(gnuastro-checksum); w=http://ftp.gnu.org/gnu/gnuastro
elif [ $$n = gsl ]; then c=$(gsl-checksum); w=http://ftp.gnu.org/gnu/gsl
elif [ $$n = hdf ]; then
mergenames=0
c=$(hdf5-checksum)
majorver=$$(echo $(hdf5-version) | sed -e 's/\./ /g' | awk '{printf("%d.%d", $$1, $$2)}')
w=https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$$majorver/hdf5-$(hdf5-version)/src/$*
majv=$$(echo $(hdf5-version) | sed -e 's/\./ /g' | awk '{printf("%d.%d", $$1, $$2)}')
w=https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$$majv/hdf5-$(hdf5-version)/src
elif [ $$n = imagemagick ]; then
mergenames=0
c=$(imagemagick-checksum)
w=https://www.imagemagick.org/download/releases/ImageMagick-$(imagemagick-version).tar.xz
w=https://www.imagemagick.org/download/releases
infile=ImageMagick-$(imagemagick-version).tar.xz
elif [ $$n = imfit ]; then
mergenames=0
c=$(imfit-checksum)
w=http://www.mpe.mpg.de/~erwin/resources/imfit/imfit-$(imfit-version)-source.tar.gz
infile=imfit-$(imfit-version)-source.tar.gz
w=http://www.mpe.mpg.de/~erwin/resources/imfit
elif [ $$n = install ]; then c=NO-CHECK-SUM; w=http://mirror.ctan.org/systems/texlive/tlnet
elif [ $$n = jpegsrc ]; then c=$(libjpeg-checksum); w=http://ijg.org/files
elif [ $$n = lapack ]; then c=$(lapack-checksum); w=http://www.netlib.org/lapack
elif [ $$n = libpng ]; then c=$(libpng-checksum); w=https://download.sourceforge.net/libpng
elif [ $$n = libgit ]; then
mergenames=0
c=$(libgit2-checksum)
w=https://github.com/libgit2/libgit2/archive/v$(libgit2-version).tar.gz
elif [ $$n = libgit ]; then c=$(libgit2-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = libxml ]; then c=$(libxml-checksum); w=ftp://xmlsoft.org/libxml2
elif [ $$n = netpbm ]; then c=$(netpbm-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = openblas ]; then
mergenames=0
c=$(openblas-checksum)
w=https://github.com/xianyi/OpenBLAS/archive/v$(openblas-version).tar.gz
elif [ $$n = openblas ]; then c=$(openblas-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = openmpi ]; then
mergenames=0
c=$(openmpi-checksum)
majorver=$$(echo $(openmpi-version) | sed -e 's/\./ /g' | awk '{printf("%d.%d", $$1, $$2)}')
w=https://download.open-mpi.org/release/open-mpi/v$$majorver/$*
w=https://download.open-mpi.org/release/open-mpi/v$$majorver
c=$(openmpi-checksum)
elif [ $$n = pixman ]; then c=$(pixman-checksum); w=https://www.cairographics.org/releases
elif [ $$n = scamp ]; then c=$(scamp-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = scons ]; then
mergenames=0
c=$(scons-checksum)
w=https://sourceforge.net/projects/scons/files/scons/$(scons-version)/scons-$(scons-version).tar.gz/download
elif [ $$n = scons ]; then c=$(scons-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = sextractor ]; then c=$(sextractor-checksum); w=http://akhlaghi.org/reproduce-software
elif [ $$n = swarp ]; then c=$(swarp-checksum); w=https://www.astromatic.net/download/swarp
elif [ $$n = swig ]; then c=$(swig-checksum); w=https://sourceforge.net/projects/swig/files/swig/swig-$(swig-version)
......@@ -256,15 +242,11 @@ $(tarballs): $(tdir)/%: | $(lockdir)
if [ -f $(DEPENDENCIES-DIR)/$* ]; then
cp $(DEPENDENCIES-DIR)/$* "$@.unchecked"
else
if [ $$mergenames = 1 ]; then tarballurl=$$w/"$*"
else tarballurl=$$w
fi
# Download using the script specially defined for this job.
touch $(lockdir)/download
downloader="wget --no-use-server-timestamps -O"
$(downloadwrapper) "$$downloader" $(lockdir)/download \
$$tarballurl "$@.unchecked"
$$w $$infile "$@.unchecked" $(mirrors)
fi
# Make sure this is the expected tarball. Note that we now have a
......
......@@ -130,30 +130,28 @@ $(pytarballs): $(tdir)/%:
# Set the top download link of the requested tarball. The ones
# that have non-standard filenames (differing from our archived
# tarball names) are treated first, then the standard ones.
mergenames=1
if [ $$n = cython ]; then
mergenames=0
w=""
infile=$*
if [ $$n = cython ]; then
c=$(cython-checksum)
hash=36/da/fcb979fc8cb486a67a013d6aefefbb95a3e19e67e49dff8a35e014046c5e
h=$(pytopurl)/$$hash/Cython-$(cython-version).tar.gz
infile=Cython-$(cython-version).tar.gz
w=$(pytopurl)/36/da/fcb979fc8cb486a67a013d6aefefbb95a3e19e67e49dff8a35e014046c5e
elif [ $$n = libffi ]; then
c=$(libffi-checksum)
w=ftp://sourceware.org/pub/libffi
infile=libffi-$(libffi-version).tar.gz
elif [ $$n = python ]; then
mergenames=0
c=$(python-checksum)
h=https://www.python.org/ftp/python/$(python-version)/Python-$(python-version).tgz
infile=Python-$(python-version).tgz
w=https://www.python.org/ftp/python/$(python-version)
elif [ $$n = pyyaml ]; then
mergenames=0
c=$(pyyaml-checksum)
hash=9f/2c/9417b5c774792634834e730932745bc09a7d36754ca00acf1ccd1ac2594d
h=$(pytopurl)/$$hash/PyYAML-$(pyyaml-version).tar.gz
elif [ $$n = libffi ]; then
mergenames=0
c=$(libffi-checksum)
h=ftp://sourceware.org/pub/libffi/libffi-$(libffi-version).tar.gz
infile=PyYAML-$(pyyaml-version).tar.gz
w=$(pytopurl)/9f/2c/9417b5c774792634834e730932745bc09a7d36754ca00acf1ccd1ac2594d
elif [ $$n = secretstorage ]; then
mergenames=0
c=$(secretstorage-checksum)
hash=a6/89/df343dbc2957a317127e7ff2983230dc5336273be34f2e1911519d85aeb5
h=$(pytopurl)/$$hash/SecretStorage-$(secretstorage-version).tar.gz
infile=SecretStorage-$(secretstorage-version).tar.gz
w=$(pytopurl)/a6/89/df343dbc2957a317127e7ff2983230dc5336273be34f2e1911519d85aeb5
elif [ $$n = asn ]; then h=fc/f1/8db7daa71f414ddabfa056c4ef792e1461ff655c2ae2928a2b675bfed6b4; c=$(asn1crypto-checksum)
elif [ $$n = astroquery ]; then h=61/50/a7a08f9e54d7d9d97e69433cd88231e1ad2901811c9d1ae9ac7ccaef9396; c=$(astroquery-checksum)
elif [ $$n = astropy ]; then h=eb/f7/1251bf6881861f24239efe0c24cbcfc4191ccdbb69ac3e9bb740d0c23352; c=$(astropy-checksum)
......@@ -209,15 +207,15 @@ $(pytarballs): $(tdir)/%:
if [ -f $(DEPENDENCIES-DIR)/$* ]; then
cp $(DEPENDENCIES-DIR)/$* "$@.unchecked"
else
if [ $$mergenames = 1 ]; then tarballurl=$(pytopurl)/$$h/"$*"
else tarballurl=$$h
fi
# Set the standard web-directory (if a non-standard directory
# wasn't set).
if [ x"$$w" = x ]; then w=$(pytopurl)/$$h; fi
# Download using the script specially defined for this job.
touch $(lockdir)/download
downloader="wget --no-use-server-timestamps -O"
$(downloadwrapper) "$$downloader" $(lockdir)/download \
$$tarballurl "$@.unchecked"
$$w $$infile "$@.unchecked" $(mirrors)
fi
# Make sure this is the expected tarball. Note that we now have a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment