#!/bin/sh -e

##
# The purpose of this script is to validate APKBUILD source URLs
# and to cache them in a distfiles-compatible format.
#
#     Usage: ./healthchecks [MAINTAINER]
#
# The optional 'MAINTAINER' argument is passed to 'grep' and is
# used to filter out non-matching packages. Example: 'Dan'.
#
# In addition to server and file availability checks, expected
# and actual (computed) checksums are compared. Files which are
# already present in the cache directory are skipped.
#
# Currently, up to approximately 9 GB of data may be downloaded.


#---------------------------------------------------------------
# internal

##
# Configurable paths.
#
HERE="$(dirname $(readlink -f ${0}))";
TEMP="$(mktemp -d)";            # tmp dir for intermediate files
SAVE="/var/www/source/archive"; # successful download cache dir


##
# Globals.
#
what=;                          # package location and name
name=;                          # direct file name
down=;                          # direct download URL
okay=;                          # status of last file downloaded


#---------------------------------------------------------------
# support routines

##
# try_down
#
# Attempts to download the requested file and name accordingly.
#
# Input: 'name' and 'down' global variables, a file name and URL
# respectively. Redirects are followed during file download. If
# successful, file is moved to a cache. Failures are recorded.
#
try_down ()
{
    printf "      - %s (%s)\n" "${down}" "${name}";

    okay=0;
    test -f "${name}" && return;

    ##
    # Download the file and set global 'okay' to curl retval.
    #
    curl --connect-timeout 10 -sLo "${TEMP}/${name}" "${down}" || okay=${?};


    ##
    # The return code may be wrong. Look for HTML documents.
    #
    file -i "${TEMP}/${name}" | grep 1>/dev/null text/html && okay=-1;


    ##
    # Record any possible failure if new, otherwise delete it.
    #
    if test ${okay} -ne 0; then
        grep 1>/dev/null "${name}" "${SAVE}/_savefail.txt" && return;
        printf >> "${SAVE}/_savefail.txt" "%s\t%s\t%s %s\n"    \
            "${okay}" "${what}" "${name}" "${down}"            \
            ;
        return;
    else
        sed -i "${SAVE}/_savefail.txt" -e "/\b${name}\b/d";
    fi


    ##
    # Successful download; move the file to the cache directory.
    #
    mv "${TEMP}/${name}" "${name}";
}


##
# get_what APKBUILD
#
# Parses the absolute path of an APKBUILD file (argument) into a
# global 'what' variable updated to contain the category/name of
# the package in question.
#
get_what ()
{
    deep=$(echo ${1} | tr -cd '/' | wc -c);
    what=$(echo ${1%/APKBUILD} | cut -d'/' -f$((${deep}-1)),${deep});
}


##
# gen_sums
#
# Generates a SHA512 checksum when an entry for the package does
# not already exist.
#
# Input: 'name' and 'down' global variables, a file name and URL
# respectively. Appends two lines to SHA512SUMS file, expected
# and actual. Duplicate lines can be removed to show issues.
#
# Subsequent execution of this script with new input will result
# in an inconsistent sort order, but will not cause breakage.
#
gen_sums ()
{
    grep 1>/dev/null "${name}" "${SAVE}/SHA512SUMS" && return;

    printf "%s\n" "${sha512sums}"                              \
        | grep "${name}" >> "${SAVE}/SHA512SUMS"               \
        ;

    sha512sum >> "${SAVE}/SHA512SUMS" "${name}";
}


#---------------------------------------------------------------
# main loop

mkdir -p "${SAVE}";
touch "${SAVE}/_savefail.txt";
touch "${SAVE}/SHA512SUMS";

printf "We will now begin processing matching packages:\n";

find "${HERE}/.." -type f -name APKBUILD | sort | while read k; do

    ##
    # Source the APKBUILD to set up environment for analysis.
    #
    . "${k}";


    ##
    # If a maintainer is specified, filter out non-matches.
    #
    if test ${#} -eq 1; then
        grep 1>/dev/null Maintainer:\ ${1} ${k} || continue;
    fi


    ##
    # Determine how many directories deep the APKBUILD file is,
    # then print the package name and location.
    #
    get_what "${k}";
    printf "\n  * %s\n" "${what}";


    ##
    # Each word in the 'source' variable is one of these cases:
    #
    #  1. FILENAME::URL (contains '//' and     '::')
    #
    #  2. URL           (contains '//' and not '::')
    #
    #  3. FILENAME      (does not contain '::' or '//')
    #
    # We only care about cases 1 and 2; case 3 is ignored.
    #
    mkdir -p "${SAVE}/${pkgname}-${pkgver}";
    cd "${SAVE}/${pkgname}-${pkgver}";
    for f in ${source}; do

        ##
        # Case 1: FILENAME::URL
        #
        if   test $(echo ${f} | grep    // | grep    ::); then
            name=${f%::*};
            down=${f#*::};

        ##
        # Case 2: URL
        #
        elif test $(echo ${f} | grep    // | grep -v ::); then
            name=${f##*/};
            down=${f};

        ##
        # Case 3: FILENAME (NOP).
        #
        else
            continue;
        fi


        ##
        # Attempt to download the current file. Sets 'okay'.
        #
        try_down;
        test ${okay} -eq 0 || continue;


        ##
        # The file exists; checksum it.
        #
        gen_sums;

    done

done


#---------------------------------------------------------------
# analysis

printf "\nThe following source files failed to download:\n\n";
cat "${SAVE}/_savefail.txt";

printf "\nThe following source files exist but mismatch:\n\n";
uniq -u "${SAVE}/SHA512SUMS" | tee "${SAVE}/_hashfail.txt";


#---------------------------------------------------------------
# cleanup

rm -fr "${TEMP}";