summaryrefslogblamecommitdiff
path: root/usr.bin/sort/init.c
blob: 279deb22ad7611f7117d5a14a0dc0d4cff0a8d05 (plain) (tree)
































































                                                                              








                                                           
                                                  

















































































































































































































































































































































































                                                                               
/*	$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $	*/

/*-
 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Ben Harris and Jaromir Dolecek.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Peter McIlroy.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "sort.h"

#include <ctype.h>
#include <string.h>

static void insertcol(struct field *);
static const char *setcolumn(const char *, struct field *);

/*
 * masks of ignored characters.
 */
static unsigned char dtable[NBINS], itable[NBINS];

/*
 * parsed key options
 */
struct coldesc *clist = NULL;
int ncols = 0;

/*
 * clist (list of columns which correspond to one or more icol or tcol)
 * is in increasing order of columns.
 * Fields are kept in increasing order of fields.
 */

/* 
 * keep clist in order--inserts a column in a sorted array
 */
static void
insertcol(struct field *field)
{
	int i;
	struct coldesc *p;

	/* Make space for new item */
	p = realloc(clist, (ncols + 2) * sizeof(*clist));
	if (!p)
		err(1, "realloc");
	clist = p;
	memset(&clist[ncols], 0, sizeof(clist[ncols]));

	for (i = 0; i < ncols; i++)
		if (field->icol.num <= clist[i].num)
			break;
	if (field->icol.num != clist[i].num) {
		memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
		clist[i].num = field->icol.num;
		ncols++;
	}
	if (field->tcol.num && field->tcol.num != field->icol.num) {
		for (i = 0; i < ncols; i++)
			if (field->tcol.num <= clist[i].num)
				break;
		if (field->tcol.num != clist[i].num) {
			memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
			clist[i].num = field->tcol.num;
			ncols++;
		}
	}
}

/*
 * matches fields with the appropriate columns--n^2 but who cares?
 */
void
fldreset(struct field *fldtab)
{
	int i;

	fldtab[0].tcol.p = clist + ncols - 1;
	for (++fldtab; fldtab->icol.num; ++fldtab) {
		for (i = 0; fldtab->icol.num != clist[i].num; i++)
			;
		fldtab->icol.p = clist + i;
		if (!fldtab->tcol.num)
			continue;
		for (i = 0; fldtab->tcol.num != clist[i].num; i++)
			;
		fldtab->tcol.p = clist + i;
	}
}

/*
 * interprets a column in a -k field
 */
static const char *
setcolumn(const char *pos, struct field *cur_fld)
{
	struct column *col;
	char *npos;
	int tmp;
	col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
	col->num = (int) strtol(pos, &npos, 10);
	pos = npos;
	if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
		errx(2, "field numbers must be positive");
	if (*pos == '.') {
		if (!col->num)
			errx(2, "cannot indent end of line");
		++pos;
		col->indent = (int) strtol(pos, &npos, 10);
		pos = npos;
		if (&cur_fld->icol == col)
			col->indent--;
		if (col->indent < 0)
			errx(2, "illegal offset");
	}
	for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
		cur_fld->flags |= tmp;
	if (cur_fld->icol.num == 0)
		cur_fld->icol.num = 1;
	return (pos);
}

int
setfield(const char *pos, struct field *cur_fld, int gflag)
{
	cur_fld->mask = NULL;

	pos = setcolumn(pos, cur_fld);
	if (*pos == '\0')			/* key extends to EOL. */
		cur_fld->tcol.num = 0;
	else {
		if (*pos != ',')
			errx(2, "illegal field descriptor");
		setcolumn((++pos), cur_fld);
	}
	if (!cur_fld->flags)
		cur_fld->flags = gflag;
	if (REVERSE)
		/* A local 'r' doesn't invert the global one */
		cur_fld->flags &= ~R;

	/* Assign appropriate mask table and weight table. */
	cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
	if (cur_fld->flags & I)
		cur_fld->mask = itable;
	else if (cur_fld->flags & D)
		cur_fld->mask = dtable;

	cur_fld->flags |= (gflag & (BI | BT));
	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
		cur_fld->flags &= ~BT;

	if (cur_fld->tcol.num
	    && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
	    && (cur_fld->tcol.num <= cur_fld->icol.num
		    /* indent if 0 -> end of field, i.e. okay */
		    && cur_fld->tcol.indent != 0
		    && cur_fld->tcol.indent < cur_fld->icol.indent))
		errx(2, "fields out of order");

	insertcol(cur_fld);
	return (cur_fld->tcol.num);
}

int
optval(int desc, int tcolflag)
{
	switch(desc) {
	case 'b':
		if (!tcolflag)
			return BI;
		else
			return BT;
	case 'd': return D;
	case 'f': return F;
	case 'i': return I;
	case 'l': return L;
	case 'n': return N;
	case 'r': return R;
	default:  return 0;
	}
}

/*
 * Return true if the options found in ARG, according to the getopt
 * spec in OPTS, require an additional argv word as an option
 * argument.
 */
static int
options_need_argument(const char *arg, const char *opts)
{
	size_t pos;
	const char *s;

	/*assert(arg[0] == '-');*/

	pos = 1;
	while (arg[pos]) {
		s = strchr(opts, arg[pos]);
		if (s == NULL) {
			/* invalid option */
			return 0;
		}
		if (s[1] == ':') {
			/* option requires argument */
			if (arg[pos+1] == '\0') {
				/* no argument in this arg */
				return 1;
			}
			else {
				/* argument is in this arg; no more options */
				return 0;
			}
		}
		pos++;
	}
	return 0;
}

/*
 * Replace historic +SPEC arguments with appropriate -kSPEC.
 *
 * The form can be either a single +SPEC or a pair +SPEC -SPEC.
 * The following -SPEC is not recognized unless it follows
 * immediately.
 */ 
void
fixit(int *argc, char **argv, const char *opts)
{
	int i, j, sawplus;
	char *vpos, *tpos, spec[20];
	int col, indent;

	sawplus = 0;
	for (i = 1; i < *argc; i++) {
		/*
		 * This loop must stop exactly where getopt will stop.
		 * Otherwise it turns e.g. "sort x +3" into "sort x
		 * -k4.1", which will croak if +3 was in fact really a
		 * file name. In order to do this reliably we need to
		 * be able to identify argv words that are option
		 * arguments.
		 */

		if (!strcmp(argv[i], "--")) {
			/* End of options; stop. */
			break;
		}

		if (argv[i][0] == '+') {
			/* +POS argument */
			sawplus = 1;
		} else if (argv[i][0] == '-' && sawplus &&
			   isdigit((unsigned char)argv[i][1])) {
			/* -POS argument */
			sawplus = 0;
		} else if (argv[i][0] == '-') {
			/* other option */
			sawplus = 0;
			if (options_need_argument(argv[i], opts)) {
				/* skip over the argument */
				i++;
			}
			continue;
		} else {
			/* not an option at all; stop */
			sawplus = 0;
			break;
		}

		/*
		 * At this point argv[i] is an old-style spec. The
		 * sawplus flag used by the above loop logic also
		 * tells us if it's a +SPEC or -SPEC.
		 */
		
		/* parse spec */
		tpos = argv[i]+1;
		col = (int)strtol(tpos, &tpos, 10);
		if (*tpos == '.') {
			++tpos;
			indent = (int) strtol(tpos, &tpos, 10);
		} else
			indent = 0;
		/* tpos now points to the optional flags */

		/*
		 * In the traditional form, x.0 means beginning of line;
		 * in the new form, x.0 means end of line. Adjust the
		 * value of INDENT accordingly.
		 */
		if (sawplus) {
			/* +POS */
			col += 1;
			indent += 1;
		} else {
			/* -POS */
			if (indent > 0)
				col += 1;
		}

		/* make the new style spec */
		(void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
		    tpos);

		if (sawplus) {
			/* Replace the +POS argument with new-style -kSPEC */
			asprintf(&vpos, "-k%s", spec);
			argv[i] = vpos;
		} else {
			/*
			 * Append the spec to the one from the
			 * preceding +POS argument, and remove the
			 * current argv element entirely.
			 */
			asprintf(&vpos, "%s,%s", argv[i-1], spec);
			free(argv[i-1]);
			argv[i-1] = vpos;
			for (j=i; j < *argc; j++)
				argv[j] = argv[j+1];
			*argc -= 1;
			i--;
		}
	}
}

/*
 * ascii, Rascii, Ftable, and RFtable map
 *
 * Sorting 'weight' tables.
 * Convert 'ascii' characters into their sort order.
 * The 'F' variants fold lower case to upper equivalent
 * The 'R' variants are for reverse sorting.
 *
 * The record separator (REC_D) never needs a weight, this frees one
 * byte value as an 'end of key' marker. This must be 0 for normal
 * weight tables, and 0xff for reverse weight tables - and is used
 * to terminate keys so that short keys sort before (after if reverse)
 * longer keys.
 *
 * The field separator has a normal weight - although it cannot occur
 * within a key unless it is the default (space+tab).
 *
 * All other bytes map to the appropriate value for the sort order.
 * Numeric sorts don't need any tables, they are reversed by negation.
 *
 * Global reverse sorts are done by writing the sorted keys in reverse
 * order - the sort itself is stil forwards.
 * This means that weights are only ever used when generating keys, any
 * sort of the original data bytes is always forwards and unweighted.
 *
 * Note: this is only good for ASCII sorting.  For different LC 's,
 * all bets are off.
 *
 * itable[] and dtable[] are the masks for -i (ignore non-printables)
 * and -d (only sort blank and alphanumerics).
 */
void
settables(void)
{
	int i;
	int next_weight = 1;
	int rev_weight = 254;

	ascii[REC_D] = 0;
	Rascii[REC_D] = 255;
	Ftable[REC_D] = 0;
	RFtable[REC_D] = 255;

	for (i = 0; i < 256; i++) {
		if (i == REC_D)
			continue;
		ascii[i] = next_weight;
		Rascii[i] = rev_weight;
		if (Ftable[i] == 0) {
			Ftable[i] = next_weight;
			RFtable[i] = rev_weight;
			Ftable[tolower(i)] = next_weight;
			RFtable[tolower(i)] = rev_weight;
		}
		next_weight++;
		rev_weight--;

		if (i == '\n' || isprint(i))
			itable[i] = 1;

		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
			dtable[i] = 1;
	}
}