From 64b8c083f9685b15b9c9eb9af787f679daad012c Mon Sep 17 00:00:00 2001
From: Zach van Rijn <me@zv.io>
Date: Sat, 30 Apr 2022 01:00:37 -0500
Subject: scripts/*: add dependency resolver.

---
 .gitignore      |   3 +
 scripts/deplist | 179 +++++++++++++++++++++
 scripts/depsort |   3 +
 scripts/dotty   |  19 +++
 scripts/setup   |  61 +++++++
 scripts/sgrep.c | 370 ++++++++++++++++++++++++++++++++++++++++++
 scripts/tsort.c | 491 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 1126 insertions(+)
 create mode 100755 scripts/deplist
 create mode 100755 scripts/depsort
 create mode 100755 scripts/dotty
 create mode 100755 scripts/setup
 create mode 100644 scripts/sgrep.c
 create mode 100644 scripts/tsort.c

diff --git a/.gitignore b/.gitignore
index 9d982c9af..5c5ff1687 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 src
 pkg
+scripts/sgrep
+scripts/tsort
+scripts/.index
diff --git a/scripts/deplist b/scripts/deplist
new file mode 100755
index 000000000..5eb51c95b
--- /dev/null
+++ b/scripts/deplist
@@ -0,0 +1,179 @@
+#!/bin/sh -e
+
+## e.g.
+#
+#   $ ./scripts/setup
+#   $ ./scripts/deplist system | ./scripts/depsort
+#
+
+HERE="$(dirname $(readlink -f ${0}))";
+BASE="${HERE}/..";
+
+##
+# Usage
+#
+if test ${#} = 0; then
+    cat <<EOF
+Usage: ${0} REPO [REPO ...]
+EOF
+fi
+
+##
+# Sanity check. The specified repositories exist.
+#
+for repo in ${@}; do
+    if ! test -d "${BASE}/${repo}"; then
+        printf "E: '%s' is not a valid repository!\n" "${repo}" 1>&2;
+        exit 1;
+    fi
+done
+
+##
+# Find a package by name or find the parent package of a subpackage.
+# Note that the index is padded by a space on either side for easy grep.
+#
+# Usage: parent PACKAGE
+#
+parent ()
+{
+    a=$("${HERE}"/sgrep " ${1} " "${HERE}"/.index | cut -d' ' -f2 | xargs);
+    if test -n "${a}"; then
+        r=${a};
+    else
+        b=$(grep " ${1} " "${HERE}"/.index | cut -d' ' -f2 | xargs);
+        r=${b};
+    fi
+    printf "%s" "${r}";
+}
+
+##
+# Do everything at once.
+#
+for repo in ${@}; do
+    find "${BASE}/${repo}" -mindepth 1 -maxdepth 1 -type d | while read k; do
+    (
+        ##
+        # Source APKBUILD in a subshell to avoid contamination.
+        #
+        . "${k}/APKBUILD";
+
+        ##
+        # Special-case some options. For example, '!check' means
+        # that the 'checkdepends=' variable is not used. Ignore.
+        #
+        for m in ${options}; do
+            case ${m} in
+                !check) checkdepends=; ;;
+            esac
+        done
+
+        ##
+        # Aggregate all possible depends. Obviously if something
+        # is not specified, it will be empty anyway.
+        #
+        # FIXME: Clean this up somehow.
+        #
+        cats=$(tr ' ' '\n' <<EOF | sort | uniq
+${checkdepends}
+${depends}
+${depends_dev}
+${makedepends}
+${makedepends_build}
+${makedepends_host}
+${_core_depends}
+EOF
+);
+
+        ##
+        # Construct canonical repository/package name.
+        #
+        p="${repo}/${k##*/}";
+
+        ##
+        # Column 1 output.
+        #
+        printf "%s " "${p}";
+        test -z "${cats}" && printf "\n" && continue; # skip when empty; we are done
+
+        for d in ${cats}; do
+
+            ##
+            # Trim out most of the qualifiers.
+            #
+            _d=${d};            # formatting
+
+            _d=${_d%=*};        # remove =version
+            _d=${_d%>*};        # remove >version
+            _d=${_d%<*};        # remove <version
+            _d=${_d%~*};        # remove ~version
+
+            ##
+            # Handle cases where a dependency is provided by another package.
+            #
+            case "${_d}" in
+                !*) continue; ;;
+
+                # override: system
+                /bin/sh)        _d=dash; ;;
+                /sbin/init)     _d=s6-linux-init; ;;
+                cmd:sendmail)   _d=ssmtp; ;;
+                cmd:which)      _d=debianutils; ;;
+                libc-utils)     _d=musl; ;;
+
+                # override: user (FIXME: incomplete)
+                cmd:byacc)      _d=$(parent ${d#cmd:*}); ;;
+                cmd:cpio)       _d=$(parent libarchive-tools); ;;
+                cmd:gzip)       _d=$(parent ${d#cmd:*}); ;;
+                cmd:yacc)       _d=bison; ;;
+                cmd:lex)        _d=flex; ;;
+                cmd:mcookie)    _d=$(parent ${d#cmd:*}); ;;
+                cmd:unix2dos)   _d=dos2unix; ;;
+                cargo-*)        _d=rust; ;;
+                guile-dev)      _d=guile; ;;
+                llvm-dev)       _d=$(parent llvm13-dev); ;;
+                llvm-static)    _d=$(parent llvm13-static); ;;
+                llvm-test-*)    _d=$(parent llvm13-test-utils); ;;
+                py3-libxml2)    _d=$(parent py-libxml2); ;;
+
+                # automatic search
+                *)              _d=$(parent $_d); ;;
+            esac
+
+            ##
+            # Determine relative path to dependency. It might be
+            # in the wrong repository, in which case must fix.
+            #
+            # TODO: Clean up to accommodate any number of repos
+            # and their policies. For example, 'kernel' if we do
+            # end up creating that someday.
+            #
+            case "${repo}" in
+                system) # nothing in system may depend on anything outside of system
+                    if ! test -f "${BASE}/${repo}/${_d}/APKBUILD"; then
+                        printf "E: dependency '%s' not found or excepted\n" "${_d}" 1>&2;
+                        exit 1;
+                    fi
+                    path="${repo}/${_d}";
+                    ;;
+                user) # packages in user might depend on something in system
+                    if ! test -f "${BASE}/${repo}/${_d}/APKBUILD"; then
+                        if test -f "${BASE}/system/${_d}/APKBUILD"; then
+                            path="system/${_d}";
+                        else
+                            printf "E: dependency '%s' not found or excepted\n" "${_d}" 1>&2;
+                            exit 1;
+                        fi
+                    else
+                        path="${repo}/${_d}";
+                    fi
+                    ;;
+            esac
+
+            ##
+            # Print remaining columns of output if not parent.
+            #
+            printf " %s" "${path}";
+        done | tr ' ' '\n' | sed -e "\@${p}\$@d" | sort -u | xargs; # sort and remove duplicates
+    )
+    done | sort; # sort only within a repo
+done
diff --git a/scripts/depsort b/scripts/depsort
new file mode 100755
index 000000000..953260238
--- /dev/null
+++ b/scripts/depsort
@@ -0,0 +1,3 @@
+#!/bin/sh -e
+
+awk '{ for (f=1;f<=NF;f++) { print $(f),$1 } }' | "$(dirname $(readlink -f ${0}))"/tsort
diff --git a/scripts/dotty b/scripts/dotty
new file mode 100755
index 000000000..45bbd8d1d
--- /dev/null
+++ b/scripts/dotty
@@ -0,0 +1,19 @@
+#!/bin/sh -e
+
+##
+# CLOSE THIS FILE! DO NOT LOOK BELOW!
+#
+
+
+# ./scripts/deplist system | ./scripts/dotty
+
+printf "digraph G {\n";
+while read k; do
+    a=$(printf "%s " "${k}" | cut -d' ' -f1);
+    printf "    \"%s\n" "${a}";
+    for b in $(seq 2 $(_() { printf "%s" "${#}"; }; _ ${k})); do
+        printf "    \"%s\" -> \"" "${a}";
+        printf "%s " "${k}" | cut -d' ' -f${b};
+    done
+done | sed -e 's/$/";/g'
+printf "}\n";
diff --git a/scripts/setup b/scripts/setup
new file mode 100755
index 000000000..5577c7244
--- /dev/null
+++ b/scripts/setup
@@ -0,0 +1,61 @@
+#!/bin/sh -e
+
+##
+# This script prepares the 'packages.git' repository for use by
+# the included scripts. It is not needed to build packages, but
+# is useful for maintainers. The 'autobuilder' does require it.
+#
+# Re-run any time you pull from upstream or switch branches.
+#
+
+HERE="$(dirname $(readlink -f ${0}))";
+BASE="${HERE}/..";
+
+##
+# Compile 'tsort' utility. Used to compute topological sort for
+# building the world. Assumes GCC, yes, easy enough to change.
+#
+gcc -o "${HERE}"/tsort "${HERE}"/tsort.c -O3;
+
+##
+# Compile 'sgrep' utility. Used to traverse the index generated
+# below.
+#
+gcc -o "${HERE}"/sgrep "${HERE}"/sgrep.c -O3;
+
+##
+# Build subpackage index. Used for resolving dependencies when
+# a subpackage is used instead of its parent.
+#
+# MAINTAINERS: If repos renamed/added/removed, must update below.
+#
+for repo in system user legacy; do
+    find "${BASE}/${repo}" -mindepth 1 -maxdepth 1 -type d | sort | while read k; do
+    (
+        ##
+        # Source APKBUILD in a subshell to avoid contamination.
+        #
+        . "${k}/APKBUILD";
+
+        ##
+        # Print the package name, whether it has subdeps or not.
+        #
+        printf " %s " "${k##*/}";
+
+        ##
+        # Trim non-name bits from any declared subpackage(s).
+        #
+        for s in ${subpackages} ${provides} ${pkgname}; do
+            case ${s} in
+                *::*) t=${s%%:*}; ;;
+                *:*) t=${s%%:*}; ;;
+                *=*) t=${s%%=*}; ;;
+                *) t=${s}; ;;
+            esac
+            printf " %s" "${t}";
+        done | tr ' ' '\n' | sort | uniq | xargs | sed -e 's/$/ /';
+    )
+    done
+#done | awk -v X=1 'NF>X'
+# (uncomment this ^ to exclude packages with no subpackages
+done > "${HERE}"/.index
diff --git a/scripts/sgrep.c b/scripts/sgrep.c
new file mode 100644
index 000000000..d4f7d4544
--- /dev/null
+++ b/scripts/sgrep.c
@@ -0,0 +1,370 @@
+/*
+ * sgrep version 1.0
+ *
+ * Copyright 2009 Stephen C. Losen.  Distributed under the terms
+ * of the GNU General Public License (GPL)
+ *
+ * Sgrep (sorted grep) is a much faster alternative to traditional Unix
+ * grep, but with significant restrictions. 1) All input files must
+ * be sorted regular files.  2) The sort key must start at the beginning
+ * of the line.  3) The search key matches only at the beginning of
+ * the line.  4) No regular expression support.
+ *
+ * Sgrep uses a binary search algorithm, which is very fast, but
+ * requires sorted input.  Each iteration of the search eliminates
+ * half of the remaining input.  In other words, doubling the size
+ * of the file adds just one iteration.
+ *
+ * Sgrep seeks to the center of the file and then reads characters
+ * until it encounters a newline, which places the file pointer at
+ * the start of the next line.  Sgrep compares the search key with the
+ * beginning of the line.  If the key is greater than the line, then
+ * the process repeats with the second half of the file.  If less than,
+ * then the process repeats with the first half of the file.  If equal,
+ * then the line matches, but it may not be the earliest match, so the
+ * process repeats with the first half of the file.  Eventually all
+ * of the input is eliminated and sgrep finds either no matching line
+ * or the first matching line.  Sgrep outputs matching lines until it
+ * encounters a non matching line.
+ *
+ * Usage:  sgrep [ -i | -n ] [ -c ] [ -b ] [ -r ] key [ sorted_file ... ]
+ *
+ * If no input file is specified, then sgrep uses stdin.
+ *
+ * The -i flag uses case insensitive byte comparison.  The file must
+ * be sorted with "sort -f".
+ *
+ * The -n flag uses numeric comparison.  The file must be sorted
+ * with "sort -n".
+ *
+ * The -b flag causes sgrep to ignore white space at the beginning
+ * of lines and at the beginning of the search key.  The file must
+ * be sorted with "sort -b"
+ *
+ * The -c flag outputs the number of matching lines instead of the
+ * lines themselves.
+ *
+ * The -r flag specifies that the file is sorted in reverse
+ * (descending) order using "sort -r".
+ *
+ * Author:  Stephen C. Losen   University of Virginia
+ */
+
+/* large file support */
+
+#ifdef _AIX
+#define _LARGE_FILES
+#else
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+/* We need different comparison functions for different sort orderings */
+
+/* exact comparison */
+
+static int
+cmp_exact(const char *key, FILE *fp) {
+    const unsigned char *k = (unsigned char *) key;
+    int c;
+
+    while (*k != 0 && (c = getc(fp)) == *k) {
+        k++;
+    }
+    return
+        *k == 0   ?  0 :
+        c == '\n' ?  1 :
+        c == EOF  ? -1 : *k - c;
+}
+
+/* case insensitive comparison */
+
+static int
+cmp_case(const char *key, FILE *fp) {
+    const unsigned char *k = (unsigned char *) key;
+    int c;
+
+    while (*k != 0 && tolower(c = getc(fp)) == tolower(*k)) {
+        k++;
+    }
+    return
+        *k == 0   ?  0 :
+        c == '\n' ?  1 :
+        c == EOF  ? -1 :
+        tolower(*k) - tolower(c);
+}
+
+/* exact comparison ignoring leading white space */
+
+static int
+cmp_exact_white(const char *key, FILE *fp) {
+    const unsigned char *k = (unsigned char *) key;
+    int c;
+
+    while (isspace(*k)) {
+        k++;
+    }
+    while ((c = getc(fp)) != '\n' && isspace(c))
+        ;
+    while (*k != 0 && c == *k) {
+        k++;
+        c = getc(fp);
+    }
+    return
+        *k == 0   ?  0 :
+        c == '\n' ?  1 :
+        c == EOF  ? -1 : *k - c;
+}
+
+/* case insensitive comparison ignoring leading white space */
+
+static int
+cmp_case_white(const char *key, FILE *fp) {
+    const unsigned char *k = (unsigned char *) key;
+    int c;
+
+    while (isspace(*k)) {
+        k++;
+    }
+    while ((c = getc(fp)) != '\n' && isspace(c))
+        ;
+    while (*k != 0 && tolower(c) == tolower(*k)) {
+        k++;
+        c = getc(fp);
+    }
+    return
+        *k == 0   ?  0 :
+        c == '\n' ?  1 :
+        c == EOF  ? -1 :
+        tolower(*k) - tolower(c);
+}
+
+/* numeric comparison */
+
+static int
+cmp_num(const char *key, FILE *fp) {
+    int c, i = 0;
+    char buf[128], *cp = 0;
+    double low, high, x;
+
+    /* read numeric string into buf */
+
+    while((c = getc(fp)) != '\n' && c != EOF) {
+        if (i == 0 && isspace(c)) {
+            continue;
+        }
+        if (i + 1 >= sizeof(buf) ||
+            (c != '-' && c != '.' && !isdigit(c)))
+        {
+            break;
+        }
+        buf[i++] = c;
+    }
+    buf[i] = 0;
+    if (c == EOF && i == 0) {
+        return -1;
+    }
+
+    /* convert to double and use numeric comparison */
+
+    x = strtod(buf, 0);
+    low = high = strtod(key, &cp);
+    if (*cp == ':') {
+        high = strtod(cp + 1, 0);
+    }
+    return
+        high < x ? -1 :
+        low  > x ?  1 : 0;
+}
+
+static int (*compare)(const char *key, FILE *fp);
+
+/*
+ * Use binary search to find the first matching line and return
+ * its byte position.
+ */
+
+static off_t
+binsrch(const char *key, FILE *fp, int reverse) {
+    off_t low, med, high, start, prev = -1, ret = -1;
+    int cmp, c;
+    struct stat st;
+
+    fstat(fileno(fp), &st);
+    high = st.st_size - 1;
+    low = 0;
+    while (low <= high) {
+        med = (high + low) / 2;
+        fseeko(fp, med, SEEK_SET);
+
+        /* scan to start of next line if not at beginning of file */
+
+        if ((start = med) != 0)  {
+            do {
+                start++;
+            } while ((c = getc(fp)) != '\n' && c != EOF);
+        }
+
+        /* compare key with current line */
+
+        if (start != prev) {        /* avoid unnecessary compares */
+            cmp = compare(key, fp);
+            if (reverse != 0) {
+                cmp = -cmp;
+            }
+            prev = start;
+        }
+
+        /* eliminate half of input */
+
+        if (cmp < 0) {
+            high = med - 1;
+        }
+        else if (cmp > 0) {
+            low = start + 1;
+        }
+        else {             /* success, look for earlier match */
+            ret = start;
+            high = med - 1;
+        }
+    }
+    return ret;
+}
+
+/* print all lines that match the key or else just the number of matches */
+
+static void
+printmatch(const char *key, FILE *fp, off_t start,
+    const char *fname, int cflag)
+{
+    int c, count;
+
+    if (start >= 0) {
+        fseeko(fp, start, SEEK_SET);
+    }
+    for (count = 0; start >= 0 && compare(key, fp) == 0; count++) {
+        fseeko(fp, start, SEEK_SET);
+        if (cflag == 0 && fname != 0) {
+            fputs(fname, stdout);
+            fputc(':', stdout);
+        }
+        while ((c = getc(fp)) != EOF) {
+            start++;
+            if (cflag == 0) {
+                fputc(c, stdout);
+            }
+            if (c == '\n') {
+                break;
+            }
+        }
+        if (c == EOF) {
+            break;
+        }
+    }
+    if (cflag != 0) {
+        if (fname != 0) {
+            fputs(fname, stdout);
+            fputc(':', stdout);
+        }
+        printf("%d\n", count);
+    }
+}
+
+int
+main(int argc, char **argv) {
+    FILE *fp;
+    const char *key = 0;
+    int i, numfile, status;
+    int bflag = 0, cflag = 0, iflag = 0, nflag = 0, rflag = 0;
+    off_t where;
+    struct stat st;
+    extern int optind, opterr;
+
+    /* parse command line options */
+
+    opterr = 0;
+    while ((i = getopt(argc, argv, "bcfinr")) > 0 && i != '?') {
+        switch(i) {
+        case 'b':
+            bflag++;
+            break;
+        case 'c':
+            cflag++;
+            break;
+        case 'f':
+        case 'i':
+            iflag++;
+            nflag = 0;
+            break;
+        case 'n':
+            nflag++;
+            iflag = 0;
+            break;
+        case 'r':
+            rflag++;
+            break;
+        }
+    }
+    if (i == '?' || optind >= argc) {
+        fputs ("Usage: sgrep [ -i | -n ] [ -c ] [ -b ] [ -r ] key "
+            "[ sorted_file ... ]\n", stderr);
+        exit(2);
+    }
+    i = optind;
+    key = argv[i++];
+
+    /* select the comparison function */
+
+    if (iflag != 0) {
+        compare = bflag == 0 ? cmp_case : cmp_case_white;
+    }
+    else if (nflag != 0) {
+        compare = cmp_num;
+    }
+    else {
+        compare = bflag == 0 ? cmp_exact : cmp_exact_white;
+    }
+
+    /* if no input files, then search stdin */
+
+    if ((numfile = argc - i) == 0) {
+        fstat(fileno(stdin), &st);
+        if ((st.st_mode & S_IFREG) == 0) {
+            fputs("sgrep: STDIN is not a regular file\n", stderr);
+            exit(2);
+        }
+        where = binsrch(key, stdin, rflag);
+        printmatch(key, stdin, where, 0, cflag);
+        exit(where < 0);
+    }
+
+    /* search each input file */
+
+    for (status = 1; i < argc; i++) {
+        if ((fp = fopen(argv[i], "r")) == 0) {
+            fprintf(stderr, "sgrep:  could not open %s\n", argv[i]);
+            status = 2;
+            continue;
+        }
+        fstat(fileno(fp), &st);
+        if ((st.st_mode & S_IFREG) == 0) {
+            fprintf(stderr, "sgrep: %s is not a regular file\n", argv[i]);
+            status = 2;
+            fclose(fp);
+            continue;
+        }
+        where = binsrch(key, fp, rflag);
+        printmatch(key, fp, where, numfile == 1 ? 0 : argv[i], cflag);
+        if (status == 1 && where >= 0) {
+            status = 0;
+        }
+        fclose(fp);
+    }
+    exit(status);
+}
diff --git a/scripts/tsort.c b/scripts/tsort.c
new file mode 100644
index 000000000..e307268da
--- /dev/null
+++ b/scripts/tsort.c
@@ -0,0 +1,491 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Michael Rendell of Memorial University of Newfoundland.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1989, 1993, 1994\n\
+	The Regents of the University of California.  All rights reserved.\n";
+#endif /* not lint */
+
+#ifndef lint
+static const char sccsid[] = "@(#)tsort.c	8.3 (Berkeley) 5/4/95";
+#endif /* not lint */
+
+#include <sys/cdefs.h>
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/*
+ *  Topological sort.  Input is a list of pairs of strings separated by
+ *  white space (spaces, tabs, and/or newlines); strings are written to
+ *  standard output in sorted order, one per line.
+ *
+ *  usage:
+ *     tsort [-dlq] [inputfile]
+ *  If no input file is specified, standard input is read.
+ *
+ *  Should be compatible with AT&T tsort HOWEVER the output is not identical
+ *  (i.e. for most graphs there is more than one sorted order, and this tsort
+ *  usually generates a different one then the AT&T tsort).  Also, cycle
+ *  reporting seems to be more accurate in this version (the AT&T tsort
+ *  sometimes says a node is in a cycle when it isn't).
+ *
+ *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
+ */
+
+#define	NF_MARK		0x1		/* marker for cycle detection */
+#define	NF_ACYCLIC	0x2		/* this node is cycle free */
+#define	NF_NODEST	0x4		/* Unreachable */
+
+
+typedef struct node_str NODE;
+
+struct node_str {
+	NODE **n_prevp;			/* pointer to previous node's n_next */
+	NODE *n_next;			/* next node in graph */
+	NODE **n_arcs;			/* array of arcs to other nodes */
+	int n_narcs;			/* number of arcs in n_arcs[] */
+	int n_arcsize;			/* size of n_arcs[] array */
+	int n_refcnt;			/* # of arcs pointing to this node */
+	int n_flags;			/* NF_* */
+	char n_name[1];			/* name of this node */
+};
+
+typedef struct _buf {
+	char *b_buf;
+	int b_bsize;
+} BUF;
+
+#define HASH_CHUNK_SIZE 64
+#define HASH_BUCKET_COUNT 1024
+
+struct hash_elem {
+	NODE *elem;
+	struct hash_elem *next;
+};
+
+struct hash_chunk {
+	struct hash_elem elems[HASH_CHUNK_SIZE];
+	struct hash_chunk *next;
+};
+
+struct hash {
+	struct hash_elem **elems;
+	struct hash_chunk *chunks;
+	struct hash_elem *top;
+};
+
+static void hash_init(struct hash *h) {
+	h->chunks = NULL;
+	h->top = NULL;
+	h->elems = calloc(1024, sizeof(struct hash_elem *));
+}
+
+static void hash_destroy(struct hash *h) {
+	for (size_t i = 0; i < HASH_BUCKET_COUNT; ++i) {
+		struct hash_elem *e = h->elems[i];
+		while (e) {
+			free(e->elem->n_arcs);
+			free(e->elem);
+			e = e->next;
+		}
+	}
+	free(h->elems);
+	while (h->chunks) {
+		struct hash_chunk *c = h->chunks;
+		h->chunks = h->chunks->next;
+		free(c);
+	}
+}
+
+static size_t hash_key(char *key) {
+	size_t h = 5381;
+	for (size_t i = 0, k; (k = key[i]); ++i)
+		h = ((h << 5) + h) ^ k;
+	return h;
+}
+
+static NODE *hash_find(struct hash *h, char *key) {
+	size_t hash = hash_key(key) & (HASH_BUCKET_COUNT - 1);
+	for (struct hash_elem *c = h->elems[hash]; c; c = c->next) {
+		if (!strcmp(key, c->elem->n_name))
+			return c->elem;
+	}
+	return NULL;
+}
+
+static struct hash_elem *hash_insert(struct hash *h, char *key) {
+	size_t hash = hash_key(key) & (HASH_BUCKET_COUNT - 1);
+	if (!h->top) {
+		struct hash_chunk *c = calloc(1, sizeof(struct hash_chunk));
+		c->next = h->chunks;
+		h->chunks = c;
+		for (size_t i = 0; i < (HASH_CHUNK_SIZE - 1); ++i) 
+			c->elems[i].next = &c->elems[i + 1];
+		c->elems[HASH_CHUNK_SIZE - 1].next = h->top;
+		h->top = c->elems;
+	}
+	struct hash_elem *hc = h->top;
+	h->top = h->top->next;
+	hc->next = h->elems[hash];
+	h->elems[hash] = hc;
+	return hc;
+}
+
+static struct hash db;
+static NODE *graph, **cycle_buf, **longest_cycle;
+static int debug, longest, quiet;
+
+static void	 add_arc(char *, char *);
+static int	 find_cycle(NODE *, NODE *, int, int);
+static NODE	*get_node(char *);
+static void	*grow_buf(void *, size_t);
+static void	 remove_node(NODE *);
+static void	 clear_cycle(void);
+static void	 tsort(void);
+static void	 usage(void);
+
+int
+main(int argc, char *argv[])
+{
+	BUF *b;
+	int c, n;
+	FILE *fp;
+	int bsize, ch, nused;
+	BUF bufs[2];
+
+	fp = NULL;
+	while ((ch = getopt(argc, argv, "dlq")) != -1)
+		switch (ch) {
+		case 'd':
+			debug = 1;
+			break;
+		case 'l':
+			longest = 1;
+			break;
+		case 'q':
+			quiet = 1;
+			break;
+		case '?':
+		default:
+			usage();
+		}
+	argc -= optind;
+	argv += optind;
+
+	switch (argc) {
+	case 0:
+		fp = stdin;
+		break;
+	case 1:
+		if ((fp = fopen(*argv, "r")) == NULL)
+			err(1, "%s", *argv);
+		break;
+	default:
+		usage();
+	}
+
+	for (b = bufs, n = 2; --n >= 0; b++)
+		b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
+
+	hash_init(&db);
+
+	/* parse input and build the graph */
+	for (n = 0, c = getc(fp);;) {
+		while (c != EOF && isspace(c))
+			c = getc(fp);
+		if (c == EOF)
+			break;
+
+		nused = 0;
+		b = &bufs[n];
+		bsize = b->b_bsize;
+		do {
+			b->b_buf[nused++] = c;
+			if (nused == bsize)
+				b->b_buf = grow_buf(b->b_buf, bsize *= 2);
+			c = getc(fp);
+		} while (c != EOF && !isspace(c));
+
+		b->b_buf[nused] = '\0';
+		b->b_bsize = bsize;
+		if (n)
+			add_arc(bufs[0].b_buf, bufs[1].b_buf);
+		n = !n;
+	}
+	(void)fclose(fp);
+	if (n)
+		errx(1, "odd data count");
+
+	/* do the sort */
+	tsort();
+	hash_destroy(&db);
+	exit(0);
+}
+
+/* double the size of oldbuf and return a pointer to the new buffer. */
+static void *
+grow_buf(void *bp, size_t size)
+{
+	if ((bp = realloc(bp, size)) == NULL)
+		err(1, NULL);
+	return (bp);
+}
+
+/*
+ * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
+ * the graph, then add them.
+ */
+static void
+add_arc(char *s1, char *s2)
+{
+	NODE *n1;
+	NODE *n2;
+	int bsize, i;
+
+	n1 = get_node(s1);
+
+	if (!strcmp(s1, s2))
+		return;
+
+	n2 = get_node(s2);
+
+	/*
+	 * Check if this arc is already here.
+	 */
+	for (i = 0; i < n1->n_narcs; i++)
+		if (n1->n_arcs[i] == n2)
+			return;
+	/*
+	 * Add it.
+	 */
+	if (n1->n_narcs == n1->n_arcsize) {
+		if (!n1->n_arcsize)
+			n1->n_arcsize = 10;
+		bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
+		n1->n_arcs = grow_buf(n1->n_arcs, bsize);
+		n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
+	}
+	n1->n_arcs[n1->n_narcs++] = n2;
+	++n2->n_refcnt;
+}
+
+/* Find a node in the graph (insert if not found) and return a pointer to it. */
+static NODE *
+get_node(char *name)
+{
+	NODE *n = hash_find(&db, name);
+	size_t nlen;
+
+	if (n)
+		return n;
+
+	nlen = strlen(name) + 1;
+
+	if ((n = malloc(sizeof(NODE) + nlen)) == NULL)
+		err(1, NULL);
+
+	n->n_narcs = 0;
+	n->n_arcsize = 0;
+	n->n_arcs = NULL;
+	n->n_refcnt = 0;
+	n->n_flags = 0;
+	bcopy(name, n->n_name, nlen);
+
+	/* Add to linked list. */
+	if ((n->n_next = graph) != NULL)
+		graph->n_prevp = &n->n_next;
+	n->n_prevp = &graph;
+	graph = n;
+
+	/* Add to hash table. */
+	hash_insert(&db, name)->elem = n;
+	return (n);
+}
+
+
+/*
+ * Clear the NODEST flag from all nodes.
+ */
+static void
+clear_cycle(void)
+{
+	NODE *n;
+
+	for (n = graph; n != NULL; n = n->n_next)
+		n->n_flags &= ~NF_NODEST;
+}
+
+/* do topological sort on graph */
+static void
+tsort(void)
+{
+	NODE *n, *next;
+	int cnt, i;
+
+	while (graph != NULL) {
+		/*
+		 * Keep getting rid of simple cases until there are none left,
+		 * if there are any nodes still in the graph, then there is
+		 * a cycle in it.
+		 */
+		do {
+			for (cnt = 0, n = graph; n != NULL; n = next) {
+				next = n->n_next;
+				if (n->n_refcnt == 0) {
+					remove_node(n);
+					++cnt;
+				}
+			}
+		} while (graph != NULL && cnt);
+
+		if (graph == NULL)
+			break;
+
+		if (!cycle_buf) {
+			/*
+			 * Allocate space for two cycle logs - one to be used
+			 * as scratch space, the other to save the longest
+			 * cycle.
+			 */
+			for (cnt = 0, n = graph; n != NULL; n = n->n_next)
+				++cnt;
+			cycle_buf = malloc(sizeof(NODE *) * cnt);
+			longest_cycle = malloc(sizeof(NODE *) * cnt);
+			if (cycle_buf == NULL || longest_cycle == NULL)
+				err(1, NULL);
+		}
+		for (n = graph; n != NULL; n = n->n_next)
+			if (!(n->n_flags & NF_ACYCLIC)) {
+				if ((cnt = find_cycle(n, n, 0, 0))) {
+					if (!quiet) {
+						warnx("cycle in data");
+						for (i = 0; i < cnt; i++)
+							warnx("%s",
+							    longest_cycle[i]->n_name);
+					}
+					remove_node(n);
+					clear_cycle();
+					break;
+				} else {
+					/* to avoid further checks */
+					n->n_flags  |= NF_ACYCLIC;
+					clear_cycle();
+				}
+			}
+
+		if (n == NULL)
+			errx(1, "internal error -- could not find cycle");
+	}
+}
+
+/* print node and remove from graph (does not actually free node) */
+static void
+remove_node(NODE *n)
+{
+	NODE **np;
+	int i;
+
+	(void)printf("%s\n", n->n_name);
+	for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
+		--(*np)->n_refcnt;
+	n->n_narcs = 0;
+	*n->n_prevp = n->n_next;
+	if (n->n_next)
+		n->n_next->n_prevp = n->n_prevp;
+}
+
+
+/* look for the longest? cycle from node from to node to. */
+static int
+find_cycle(NODE *from, NODE *to, int longest_len, int depth)
+{
+	NODE **np;
+	int i, len;
+
+	/*
+	 * avoid infinite loops and ignore portions of the graph known
+	 * to be acyclic
+	 */
+	if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
+		return (0);
+	from->n_flags |= NF_MARK;
+
+	for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
+		cycle_buf[depth] = *np;
+		if (*np == to) {
+			if (depth + 1 > longest_len) {
+				longest_len = depth + 1;
+				(void)memcpy((char *)longest_cycle,
+				    (char *)cycle_buf,
+				    longest_len * sizeof(NODE *));
+			}
+		} else {
+			if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
+				continue;
+			len = find_cycle(*np, to, longest_len, depth + 1);
+
+			if (debug)
+				(void)printf("%*s %s->%s %d\n", depth, "",
+				    from->n_name, to->n_name, len);
+
+			if (len == 0)
+				(*np)->n_flags |= NF_NODEST;
+
+			if (len > longest_len)
+				longest_len = len;
+
+			if (len > 0 && !longest)
+				break;
+		}
+	}
+	from->n_flags &= ~NF_MARK;
+	return (longest_len);
+}
+
+static void
+usage(void)
+{
+	(void)fprintf(stderr, "usage: tsort [-dlq] [file]\n");
+	exit(1);
+}
-- 
cgit v1.2.3-70-g09d2