From a5662cc0afd42fcfb475c3158cc9cc35f3ce29f0 Mon Sep 17 00:00:00 2001 From: Zach van Rijn Date: Tue, 5 Oct 2021 18:45:44 -0500 Subject: scripts/healthchecks: add new upstream source url and file validator --- scripts/healthchecks | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100755 scripts/healthchecks (limited to 'scripts/healthchecks') diff --git a/scripts/healthchecks b/scripts/healthchecks new file mode 100755 index 000000000..99e96a638 --- /dev/null +++ b/scripts/healthchecks @@ -0,0 +1,221 @@ +#!/bin/sh -e + +## +# The purpose of this script is to validate APKBUILD source URLs +# and to cache them in a distfiles-compatible format. +# +# Usage: ./healthchecks [MAINTAINER] +# +# The optional 'MAINTAINER' argument is passed to 'grep' and is +# used to filter out non-matching packages. Example: 'Dan'. +# +# In addition to server and file availability checks, expected +# and actual (computed) checksums are compared. Files which are +# already present in the cache directory are skipped. +# +# Currently, up to approximately 9 GB of data may be downloaded. + + +#--------------------------------------------------------------- +# internal + +## +# Configurable paths. +# +HERE="$(dirname $(readlink -f ${0}))"; +TEMP="$(mktemp -d)"; # temp dir for further analysis +SAVE="${HERE}/../cache"; # successful download cache dir + + +## +# Globals. +# +what=; # package location and name +name=; # direct file name +down=; # direct download URL +okay=; # status of last file downloaded + + +#--------------------------------------------------------------- +# support routines + +## +# try_down +# +# Attempts to download the requested file and name accordingly. +# +# Input: 'name' and 'down' global variables, a file name and URL +# respectively. Redirects are followed during file download. If +# successful, file is moved to a cache. Failures are recorded. +# +try_down () +{ + printf " - %s (%s)\n" "${down}" "${name}"; + + okay=0; + test -f "${SAVE}/${name}" && return; + + touch "${TEMP}/fail.list"; + + ## + # Download the file and set global 'okay' to curl retval. + # + curl --connect-timeout 10 -sLo "${TEMP}/${name}" "${down}" || okay=${?}; + + + ## + # Record any possible failure. + # + if test ${okay} -ne 0; then + printf >> "${TEMP}/fail.list" "%s %s %s\n" \ + "${okay}" "${name}" "${down}" \ + ; + return; + fi + + + ## + # Successful download; move the file to the cache directory. + # + mv "${TEMP}/${name}" "${SAVE}/${name}"; +} + + +## +# get_what APKBUILD +# +# Parses the absolute path of an APKBUILD file (argument) into a +# global 'what' variable updated to contain the category/name of +# the package in question. +# +get_what () +{ + deep=$(echo ${1} | tr -cd '/' | wc -c); + what=$(echo ${1%/APKBUILD} | cut -d'/' -f$((${deep}-1)),${deep}); +} + + +## +# gen_sums +# +# Generates a SHA512 checksum when an entry for the package does +# not already exist. +# +# Input: 'name' and 'down' global variables, a file name and URL +# respectively. Appends two lines to SHA512SUMS file, expected +# and actual. Duplicate lines can be removed to show issues. +# +# Subsequent execution of this script with new input will result +# in an inconsistent sort order, but will not cause breakage. +# +gen_sums () +{ + grep 1>/dev/null "${name}" "${SAVE}/SHA512SUMS" && return; + + printf "%s\n" "${sha512sums}" \ + | grep "${name}" >> "${SAVE}/SHA512SUMS" \ + ; + + cd "${SAVE}"; + + sha512sum >> "${SAVE}/SHA512SUMS" "${name}"; +} + + +#--------------------------------------------------------------- +# main loop + +printf "We will now begin processing matching packages:\n"; + +mkdir -p "${SAVE}"; + +find "${HERE}/.." -type f -name APKBUILD | sort | while read k; do + + ## + # Source the APKBUILD to set up environment for analysis. + # + . "${k}"; + + + ## + # If a maintainer is specified, filter out non-matches. + # + if test ${#} -eq 1; then + grep 1>/dev/null Maintainer:\ ${1} ${k} || continue; + fi + + + ## + # Determine how many directories deep the APKBUILD file is, + # then print the package name and location. + # + get_what "${k}"; + printf "\n * %s\n" "${what}"; + + + ## + # Each word in the 'source' variable is one of these cases: + # + # 1. FILENAME::URL (contains '//' and '::') + # + # 2. URL (contains '//' and not '::') + # + # 3. FILENAME (does not contain '::' or '//') + # + # We only care about cases 1 and 2; case 3 is ignored. + # + for f in ${source}; do + + ## + # Case 1: FILENAME::URL + # + if test $(echo ${f} | grep // | grep ::); then + name=${f%::*}; + down=${f#*::}; + + ## + # Case 2: URL + # + elif test $(echo ${f} | grep // | grep -v ::); then + name=${f##*/}; + down=${f}; + + ## + # Case 3: FILENAME (NOP). + # + else + continue; + fi + + + ## + # Attempt to download the current file. Sets 'okay'. + # + try_down; + test ${okay} -eq 0 || continue; + + + ## + # The file exists; checksum it. + # + gen_sums; + + done + +done + + +#--------------------------------------------------------------- +# analysis + +printf "\nThe following source files failed to download:\n\n"; +cat "${TEMP}/fail.list"; + +printf "\nThe following source files exist but mismatch:\n\n"; +uniq -u "${SAVE}/SHA512SUMS"; + + +#--------------------------------------------------------------- +# cleanup + +rm -fr "${TEMP}"; -- cgit v1.2.3-60-g2f50