initial population

author: Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> 2019-02-01 22:55:37 +0000
committer: Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> 2019-02-03 18:22:05 -0600
commit: 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch)
tree: 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort/init.c
download: userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip
1 files changed, 447 insertions, 0 deletions
diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c
new file mode 100644
index 0000000..ff1799b
--- /dev/null
+++ b/usr.bin/sort/init.c
@@ -0,0 +1,447 @@
+/*	$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $	*/
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+
+__RCSID("$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $");
+
+#include <ctype.h>
+#include <string.h>
+
+static void insertcol(struct field *);
+static const char *setcolumn(const char *, struct field *);
+
+/*
+ * masks of ignored characters.
+ */
+static u_char dtable[NBINS], itable[NBINS];
+
+/*
+ * parsed key options
+ */
+struct coldesc *clist = NULL;
+int ncols = 0;
+
+/*
+ * clist (list of columns which correspond to one or more icol or tcol)
+ * is in increasing order of columns.
+ * Fields are kept in increasing order of fields.
+ */
+
+/* 
+ * keep clist in order--inserts a column in a sorted array
+ */
+static void
+insertcol(struct field *field)
+{
+	int i;
+	struct coldesc *p;
+
+	/* Make space for new item */
+	p = realloc(clist, (ncols + 2) * sizeof(*clist));
+	if (!p)
+		err(1, "realloc");
+	clist = p;
+	memset(&clist[ncols], 0, sizeof(clist[ncols]));
+
+	for (i = 0; i < ncols; i++)
+		if (field->icol.num <= clist[i].num)
+			break;
+	if (field->icol.num != clist[i].num) {
+		memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
+		clist[i].num = field->icol.num;
+		ncols++;
+	}
+	if (field->tcol.num && field->tcol.num != field->icol.num) {
+		for (i = 0; i < ncols; i++)
+			if (field->tcol.num <= clist[i].num)
+				break;
+		if (field->tcol.num != clist[i].num) {
+			memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
+			clist[i].num = field->tcol.num;
+			ncols++;
+		}
+	}
+}
+
+/*
+ * matches fields with the appropriate columns--n^2 but who cares?
+ */
+void
+fldreset(struct field *fldtab)
+{
+	int i;
+
+	fldtab[0].tcol.p = clist + ncols - 1;
+	for (++fldtab; fldtab->icol.num; ++fldtab) {
+		for (i = 0; fldtab->icol.num != clist[i].num; i++)
+			;
+		fldtab->icol.p = clist + i;
+		if (!fldtab->tcol.num)
+			continue;
+		for (i = 0; fldtab->tcol.num != clist[i].num; i++)
+			;
+		fldtab->tcol.p = clist + i;
+	}
+}
+
+/*
+ * interprets a column in a -k field
+ */
+static const char *
+setcolumn(const char *pos, struct field *cur_fld)
+{
+	struct column *col;
+	char *npos;
+	int tmp;
+	col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
+	col->num = (int) strtol(pos, &npos, 10);
+	pos = npos;
+	if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
+		errx(2, "field numbers must be positive");
+	if (*pos == '.') {
+		if (!col->num)
+			errx(2, "cannot indent end of line");
+		++pos;
+		col->indent = (int) strtol(pos, &npos, 10);
+		pos = npos;
+		if (&cur_fld->icol == col)
+			col->indent--;
+		if (col->indent < 0)
+			errx(2, "illegal offset");
+	}
+	for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
+		cur_fld->flags |= tmp;
+	if (cur_fld->icol.num == 0)
+		cur_fld->icol.num = 1;
+	return (pos);
+}
+
+int
+setfield(const char *pos, struct field *cur_fld, int gflag)
+{
+	cur_fld->mask = NULL;
+
+	pos = setcolumn(pos, cur_fld);
+	if (*pos == '\0')			/* key extends to EOL. */
+		cur_fld->tcol.num = 0;
+	else {
+		if (*pos != ',')
+			errx(2, "illegal field descriptor");
+		setcolumn((++pos), cur_fld);
+	}
+	if (!cur_fld->flags)
+		cur_fld->flags = gflag;
+	if (REVERSE)
+		/* A local 'r' doesn't invert the global one */
+		cur_fld->flags &= ~R;
+
+	/* Assign appropriate mask table and weight table. */
+	cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
+	if (cur_fld->flags & I)
+		cur_fld->mask = itable;
+	else if (cur_fld->flags & D)
+		cur_fld->mask = dtable;
+
+	cur_fld->flags |= (gflag & (BI | BT));
+	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
+		cur_fld->flags &= ~BT;
+
+	if (cur_fld->tcol.num
+	    && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
+	    && (cur_fld->tcol.num <= cur_fld->icol.num
+		    /* indent if 0 -> end of field, i.e. okay */
+		    && cur_fld->tcol.indent != 0
+		    && cur_fld->tcol.indent < cur_fld->icol.indent))
+		errx(2, "fields out of order");
+
+	insertcol(cur_fld);
+	return (cur_fld->tcol.num);
+}
+
+int
+optval(int desc, int tcolflag)
+{
+	switch(desc) {
+	case 'b':
+		if (!tcolflag)
+			return BI;
+		else
+			return BT;
+	case 'd': return D;
+	case 'f': return F;
+	case 'i': return I;
+	case 'l': return L;
+	case 'n': return N;
+	case 'r': return R;
+	default:  return 0;
+	}
+}
+
+/*
+ * Return true if the options found in ARG, according to the getopt
+ * spec in OPTS, require an additional argv word as an option
+ * argument.
+ */
+static int
+options_need_argument(const char *arg, const char *opts)
+{
+	size_t pos;
+	const char *s;
+
+	/*assert(arg[0] == '-');*/
+
+	pos = 1;
+	while (arg[pos]) {
+		s = strchr(opts, arg[pos]);
+		if (s == NULL) {
+			/* invalid option */
+			return 0;
+		}
+		if (s[1] == ':') {
+			/* option requires argument */
+			if (arg[pos+1] == '\0') {
+				/* no argument in this arg */
+				return 1;
+			}
+			else {
+				/* argument is in this arg; no more options */
+				return 0;
+			}
+		}
+		pos++;
+	}
+	return 0;
+}
+
+/*
+ * Replace historic +SPEC arguments with appropriate -kSPEC.
+ *
+ * The form can be either a single +SPEC or a pair +SPEC -SPEC.
+ * The following -SPEC is not recognized unless it follows
+ * immediately.
+ */ 
+void
+fixit(int *argc, char **argv, const char *opts)
+{
+	int i, j, sawplus;
+	char *vpos, *tpos, spec[20];
+	int col, indent;
+
+	sawplus = 0;
+	for (i = 1; i < *argc; i++) {
+		/*
+		 * This loop must stop exactly where getopt will stop.
+		 * Otherwise it turns e.g. "sort x +3" into "sort x
+		 * -k4.1", which will croak if +3 was in fact really a
+		 * file name. In order to do this reliably we need to
+		 * be able to identify argv words that are option
+		 * arguments.
+		 */
+
+		if (!strcmp(argv[i], "--")) {
+			/* End of options; stop. */
+			break;
+		}
+
+		if (argv[i][0] == '+') {
+			/* +POS argument */
+			sawplus = 1;
+		} else if (argv[i][0] == '-' && sawplus &&
+			   isdigit((unsigned char)argv[i][1])) {
+			/* -POS argument */
+			sawplus = 0;
+		} else if (argv[i][0] == '-') {
+			/* other option */
+			sawplus = 0;
+			if (options_need_argument(argv[i], opts)) {
+				/* skip over the argument */
+				i++;
+			}
+			continue;
+		} else {
+			/* not an option at all; stop */
+			sawplus = 0;
+			break;
+		}
+
+		/*
+		 * At this point argv[i] is an old-style spec. The
+		 * sawplus flag used by the above loop logic also
+		 * tells us if it's a +SPEC or -SPEC.
+		 */
+		
+		/* parse spec */
+		tpos = argv[i]+1;
+		col = (int)strtol(tpos, &tpos, 10);
+		if (*tpos == '.') {
+			++tpos;
+			indent = (int) strtol(tpos, &tpos, 10);
+		} else
+			indent = 0;
+		/* tpos now points to the optional flags */
+
+		/*
+		 * In the traditional form, x.0 means beginning of line;
+		 * in the new form, x.0 means end of line. Adjust the
+		 * value of INDENT accordingly.
+		 */
+		if (sawplus) {
+			/* +POS */
+			col += 1;
+			indent += 1;
+		} else {
+			/* -POS */
+			if (indent > 0)
+				col += 1;
+		}
+
+		/* make the new style spec */
+		(void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
+		    tpos);
+
+		if (sawplus) {
+			/* Replace the +POS argument with new-style -kSPEC */
+			asprintf(&vpos, "-k%s", spec);
+			argv[i] = vpos;
+		} else {
+			/*
+			 * Append the spec to the one from the
+			 * preceding +POS argument, and remove the
+			 * current argv element entirely.
+			 */
+			asprintf(&vpos, "%s,%s", argv[i-1], spec);
+			free(argv[i-1]);
+			argv[i-1] = vpos;
+			for (j=i; j < *argc; j++)
+				argv[j] = argv[j+1];
+			*argc -= 1;
+			i--;
+		}
+	}
+}
+
+/*
+ * ascii, Rascii, Ftable, and RFtable map
+ *
+ * Sorting 'weight' tables.
+ * Convert 'ascii' characters into their sort order.
+ * The 'F' variants fold lower case to upper equivalent
+ * The 'R' variants are for reverse sorting.
+ *
+ * The record separator (REC_D) never needs a weight, this frees one
+ * byte value as an 'end of key' marker. This must be 0 for normal
+ * weight tables, and 0xff for reverse weight tables - and is used
+ * to terminate keys so that short keys sort before (after if reverse)
+ * longer keys.
+ *
+ * The field separator has a normal weight - although it cannot occur
+ * within a key unless it is the default (space+tab).
+ *
+ * All other bytes map to the appropriate value for the sort order.
+ * Numeric sorts don't need any tables, they are reversed by negation.
+ *
+ * Global reverse sorts are done by writing the sorted keys in reverse
+ * order - the sort itself is stil forwards.
+ * This means that weights are only ever used when generating keys, any
+ * sort of the original data bytes is always forwards and unweighted.
+ *
+ * Note: this is only good for ASCII sorting.  For different LC 's,
+ * all bets are off.
+ *
+ * itable[] and dtable[] are the masks for -i (ignore non-printables)
+ * and -d (only sort blank and alphanumerics).
+ */
+void
+settables(void)
+{
+	int i;
+	int next_weight = 1;
+	int rev_weight = 254;
+
+	ascii[REC_D] = 0;
+	Rascii[REC_D] = 255;
+	Ftable[REC_D] = 0;
+	RFtable[REC_D] = 255;
+
+	for (i = 0; i < 256; i++) {
+		if (i == REC_D)
+			continue;
+		ascii[i] = next_weight;
+		Rascii[i] = rev_weight;
+		if (Ftable[i] == 0) {
+			Ftable[i] = next_weight;
+			RFtable[i] = rev_weight;
+			Ftable[tolower(i)] = next_weight;
+			RFtable[tolower(i)] = rev_weight;
+		}
+		next_weight++;
+		rev_weight--;
+
+		if (i == '\n' || isprint(i))
+			itable[i] = 1;
+
+		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
+			dtable[i] = 1;
+	}
+}
author	Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com>	2019-02-01 22:55:37 +0000
committer	Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com>	2019-02-03 18:22:05 -0600
commit	5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch)
tree	154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort/init.c
download	userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip