diff options
Diffstat (limited to 'usr.bin/sort/init.c')
-rw-r--r-- | usr.bin/sort/init.c | 447 |
1 files changed, 447 insertions, 0 deletions
diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c new file mode 100644 index 0000000..ff1799b --- /dev/null +++ b/usr.bin/sort/init.c @@ -0,0 +1,447 @@ +/* $NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" + +__RCSID("$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $"); + +#include <ctype.h> +#include <string.h> + +static void insertcol(struct field *); +static const char *setcolumn(const char *, struct field *); + +/* + * masks of ignored characters. + */ +static u_char dtable[NBINS], itable[NBINS]; + +/* + * parsed key options + */ +struct coldesc *clist = NULL; +int ncols = 0; + +/* + * clist (list of columns which correspond to one or more icol or tcol) + * is in increasing order of columns. + * Fields are kept in increasing order of fields. + */ + +/* + * keep clist in order--inserts a column in a sorted array + */ +static void +insertcol(struct field *field) +{ + int i; + struct coldesc *p; + + /* Make space for new item */ + p = realloc(clist, (ncols + 2) * sizeof(*clist)); + if (!p) + err(1, "realloc"); + clist = p; + memset(&clist[ncols], 0, sizeof(clist[ncols])); + + for (i = 0; i < ncols; i++) + if (field->icol.num <= clist[i].num) + break; + if (field->icol.num != clist[i].num) { + memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->icol.num; + ncols++; + } + if (field->tcol.num && field->tcol.num != field->icol.num) { + for (i = 0; i < ncols; i++) + if (field->tcol.num <= clist[i].num) + break; + if (field->tcol.num != clist[i].num) { + memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->tcol.num; + ncols++; + } + } +} + +/* + * matches fields with the appropriate columns--n^2 but who cares? + */ +void +fldreset(struct field *fldtab) +{ + int i; + + fldtab[0].tcol.p = clist + ncols - 1; + for (++fldtab; fldtab->icol.num; ++fldtab) { + for (i = 0; fldtab->icol.num != clist[i].num; i++) + ; + fldtab->icol.p = clist + i; + if (!fldtab->tcol.num) + continue; + for (i = 0; fldtab->tcol.num != clist[i].num; i++) + ; + fldtab->tcol.p = clist + i; + } +} + +/* + * interprets a column in a -k field + */ +static const char * +setcolumn(const char *pos, struct field *cur_fld) +{ + struct column *col; + char *npos; + int tmp; + col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol); + col->num = (int) strtol(pos, &npos, 10); + pos = npos; + if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol))) + errx(2, "field numbers must be positive"); + if (*pos == '.') { + if (!col->num) + errx(2, "cannot indent end of line"); + ++pos; + col->indent = (int) strtol(pos, &npos, 10); + pos = npos; + if (&cur_fld->icol == col) + col->indent--; + if (col->indent < 0) + errx(2, "illegal offset"); + } + for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++) + cur_fld->flags |= tmp; + if (cur_fld->icol.num == 0) + cur_fld->icol.num = 1; + return (pos); +} + +int +setfield(const char *pos, struct field *cur_fld, int gflag) +{ + cur_fld->mask = NULL; + + pos = setcolumn(pos, cur_fld); + if (*pos == '\0') /* key extends to EOL. */ + cur_fld->tcol.num = 0; + else { + if (*pos != ',') + errx(2, "illegal field descriptor"); + setcolumn((++pos), cur_fld); + } + if (!cur_fld->flags) + cur_fld->flags = gflag; + if (REVERSE) + /* A local 'r' doesn't invert the global one */ + cur_fld->flags &= ~R; + + /* Assign appropriate mask table and weight table. */ + cur_fld->weights = weight_tables[cur_fld->flags & (R | F)]; + if (cur_fld->flags & I) + cur_fld->mask = itable; + else if (cur_fld->flags & D) + cur_fld->mask = dtable; + + cur_fld->flags |= (gflag & (BI | BT)); + if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ + cur_fld->flags &= ~BT; + + if (cur_fld->tcol.num + && !(!(cur_fld->flags & BI) && cur_fld->flags & BT) + && (cur_fld->tcol.num <= cur_fld->icol.num + /* indent if 0 -> end of field, i.e. okay */ + && cur_fld->tcol.indent != 0 + && cur_fld->tcol.indent < cur_fld->icol.indent)) + errx(2, "fields out of order"); + + insertcol(cur_fld); + return (cur_fld->tcol.num); +} + +int +optval(int desc, int tcolflag) +{ + switch(desc) { + case 'b': + if (!tcolflag) + return BI; + else + return BT; + case 'd': return D; + case 'f': return F; + case 'i': return I; + case 'l': return L; + case 'n': return N; + case 'r': return R; + default: return 0; + } +} + +/* + * Return true if the options found in ARG, according to the getopt + * spec in OPTS, require an additional argv word as an option + * argument. + */ +static int +options_need_argument(const char *arg, const char *opts) +{ + size_t pos; + const char *s; + + /*assert(arg[0] == '-');*/ + + pos = 1; + while (arg[pos]) { + s = strchr(opts, arg[pos]); + if (s == NULL) { + /* invalid option */ + return 0; + } + if (s[1] == ':') { + /* option requires argument */ + if (arg[pos+1] == '\0') { + /* no argument in this arg */ + return 1; + } + else { + /* argument is in this arg; no more options */ + return 0; + } + } + pos++; + } + return 0; +} + +/* + * Replace historic +SPEC arguments with appropriate -kSPEC. + * + * The form can be either a single +SPEC or a pair +SPEC -SPEC. + * The following -SPEC is not recognized unless it follows + * immediately. + */ +void +fixit(int *argc, char **argv, const char *opts) +{ + int i, j, sawplus; + char *vpos, *tpos, spec[20]; + int col, indent; + + sawplus = 0; + for (i = 1; i < *argc; i++) { + /* + * This loop must stop exactly where getopt will stop. + * Otherwise it turns e.g. "sort x +3" into "sort x + * -k4.1", which will croak if +3 was in fact really a + * file name. In order to do this reliably we need to + * be able to identify argv words that are option + * arguments. + */ + + if (!strcmp(argv[i], "--")) { + /* End of options; stop. */ + break; + } + + if (argv[i][0] == '+') { + /* +POS argument */ + sawplus = 1; + } else if (argv[i][0] == '-' && sawplus && + isdigit((unsigned char)argv[i][1])) { + /* -POS argument */ + sawplus = 0; + } else if (argv[i][0] == '-') { + /* other option */ + sawplus = 0; + if (options_need_argument(argv[i], opts)) { + /* skip over the argument */ + i++; + } + continue; + } else { + /* not an option at all; stop */ + sawplus = 0; + break; + } + + /* + * At this point argv[i] is an old-style spec. The + * sawplus flag used by the above loop logic also + * tells us if it's a +SPEC or -SPEC. + */ + + /* parse spec */ + tpos = argv[i]+1; + col = (int)strtol(tpos, &tpos, 10); + if (*tpos == '.') { + ++tpos; + indent = (int) strtol(tpos, &tpos, 10); + } else + indent = 0; + /* tpos now points to the optional flags */ + + /* + * In the traditional form, x.0 means beginning of line; + * in the new form, x.0 means end of line. Adjust the + * value of INDENT accordingly. + */ + if (sawplus) { + /* +POS */ + col += 1; + indent += 1; + } else { + /* -POS */ + if (indent > 0) + col += 1; + } + + /* make the new style spec */ + (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent, + tpos); + + if (sawplus) { + /* Replace the +POS argument with new-style -kSPEC */ + asprintf(&vpos, "-k%s", spec); + argv[i] = vpos; + } else { + /* + * Append the spec to the one from the + * preceding +POS argument, and remove the + * current argv element entirely. + */ + asprintf(&vpos, "%s,%s", argv[i-1], spec); + free(argv[i-1]); + argv[i-1] = vpos; + for (j=i; j < *argc; j++) + argv[j] = argv[j+1]; + *argc -= 1; + i--; + } + } +} + +/* + * ascii, Rascii, Ftable, and RFtable map + * + * Sorting 'weight' tables. + * Convert 'ascii' characters into their sort order. + * The 'F' variants fold lower case to upper equivalent + * The 'R' variants are for reverse sorting. + * + * The record separator (REC_D) never needs a weight, this frees one + * byte value as an 'end of key' marker. This must be 0 for normal + * weight tables, and 0xff for reverse weight tables - and is used + * to terminate keys so that short keys sort before (after if reverse) + * longer keys. + * + * The field separator has a normal weight - although it cannot occur + * within a key unless it is the default (space+tab). + * + * All other bytes map to the appropriate value for the sort order. + * Numeric sorts don't need any tables, they are reversed by negation. + * + * Global reverse sorts are done by writing the sorted keys in reverse + * order - the sort itself is stil forwards. + * This means that weights are only ever used when generating keys, any + * sort of the original data bytes is always forwards and unweighted. + * + * Note: this is only good for ASCII sorting. For different LC 's, + * all bets are off. + * + * itable[] and dtable[] are the masks for -i (ignore non-printables) + * and -d (only sort blank and alphanumerics). + */ +void +settables(void) +{ + int i; + int next_weight = 1; + int rev_weight = 254; + + ascii[REC_D] = 0; + Rascii[REC_D] = 255; + Ftable[REC_D] = 0; + RFtable[REC_D] = 255; + + for (i = 0; i < 256; i++) { + if (i == REC_D) + continue; + ascii[i] = next_weight; + Rascii[i] = rev_weight; + if (Ftable[i] == 0) { + Ftable[i] = next_weight; + RFtable[i] = rev_weight; + Ftable[tolower(i)] = next_weight; + RFtable[tolower(i)] = rev_weight; + } + next_weight++; + rev_weight--; + + if (i == '\n' || isprint(i)) + itable[i] = 1; + + if (i == '\n' || i == '\t' || i == ' ' || isalnum(i)) + dtable[i] = 1; + } +} |