diff options
author | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-01 22:55:37 +0000 |
---|---|---|
committer | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-03 18:22:05 -0600 |
commit | 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch) | |
tree | 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort/fields.c | |
download | userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip |
initial population
Diffstat (limited to 'usr.bin/sort/fields.c')
-rw-r--r-- | usr.bin/sort/fields.c | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c new file mode 100644 index 0000000..6208a78 --- /dev/null +++ b/usr.bin/sort/fields.c @@ -0,0 +1,380 @@ +/* $NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Subroutines to generate sort keys. */ + +#include "sort.h" + +__RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $"); + +#define SKIP_BLANKS(ptr) { \ + if (BLANK & d_mask[*(ptr)]) \ + while (BLANK & d_mask[*(++(ptr))]); \ +} + +#define NEXTCOL(pos) { \ + if (!SEP_FLAG) \ + while (BLANK & l_d_mask[*(++pos)]); \ + while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ +} + +static u_char *enterfield(u_char *, const u_char *, struct field *, int); +static u_char *number(u_char *, const u_char *, u_char *, u_char *, int); +static u_char *length(u_char *, const u_char *, u_char *, u_char *, int); + +#define DECIMAL_POINT '.' + +/* + * constructs sort key with leading recheader, followed by the key, + * followed by the original line. + */ +length_t +enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data, + size_t line_size, struct field fieldtable[]) + /* keybuf: pointer to start of key */ +{ + int i; + u_char *l_d_mask; + u_char *lineend, *pos; + const u_char *endkey; + u_char *keypos; + struct coldesc *clpos; + int col = 1; + struct field *ftpos; + + l_d_mask = d_mask; + pos = line_data - 1; + lineend = line_data + line_size-1; + /* don't include rec_delimiter */ + + for (i = 0; i < ncols; i++) { + clpos = clist + i; + for (; (col < clpos->num) && (pos < lineend); col++) { + NEXTCOL(pos); + } + if (pos >= lineend) + break; + clpos->start = SEP_FLAG ? pos + 1 : pos; + NEXTCOL(pos); + clpos->end = pos; + col++; + if (pos >= lineend) { + clpos->end = lineend; + i++; + break; + } + } + for (; i <= ncols; i++) + clist[i].start = clist[i].end = lineend; + if (clist[0].start < line_data) + clist[0].start++; + + /* + * We write the sort keys (concatenated) followed by the + * original line data (for output) as the 'keybuf' data. + * keybuf->length is the number of key bytes + data bytes. + * keybuf->offset is the number of key bytes. + * We add a record separator weight after the key in case + * (as is usual) we need to preserve the order of equal lines, + * and for 'sort -u'. + * The key itself will have had the correct weight applied. + */ + keypos = keybuf->data; + endkey = keybuf_end - line_size - 1; + if (endkey <= keypos) + /* No room for any key bytes */ + return 1; + + for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) { + if ((keypos = enterfield(keypos, endkey, ftpos, + fieldtable->flags)) == NULL) + return (1); + } + + keybuf->offset = keypos - keybuf->data; + keybuf->length = keybuf->offset + line_size; + + /* + * Posix requires that equal keys be further sorted by the + * entire original record. + * NetBSD has (at least for some time) kept equal keys in + * their original order. + * For 'sort -u' posix_sort is unset. + */ + keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset; + + memcpy(keypos, line_data, line_size); + return (0); +} + +/* + * constructs a field (as defined by -k) within a key + */ +static u_char * +enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld, + int gflags) +{ + u_char *start, *end, *lineend, *mask, *lweight; + struct column icol, tcol; + u_int flags; + + icol = cur_fld->icol; + tcol = cur_fld->tcol; + flags = cur_fld->flags; + start = icol.p->start; + lineend = clist[ncols].end; + if (flags & BI) + SKIP_BLANKS(start); + start += icol.indent; + start = min(start, lineend); + + if (!tcol.num) + end = lineend; + else { + if (tcol.indent) { + end = tcol.p->start; + if (flags & BT) + SKIP_BLANKS(end); + end += tcol.indent; + end = min(end, lineend); + } else + end = tcol.p->end; + } + + if (flags & L) + return length(tablepos, endkey, start, end, flags); + if (flags & N) + return number(tablepos, endkey, start, end, flags); + + /* Bound check space - assuming nothing is skipped */ + if (tablepos + (end - start) + 1 >= endkey) + return NULL; + + mask = cur_fld->mask; + lweight = cur_fld->weights; + for (; start < end; start++) { + if (!mask || mask[*start]) { + *tablepos++ = lweight[*start]; + } + } + /* Add extra byte (absent from lweight) to sort short keys correctly */ + *tablepos++ = lweight[REC_D]; + return tablepos; +} + +/* + * Numbers are converted to a floating point format (exponent & mantissa) + * so that they compare correctly as sequence of unsigned bytes. + * Bytes 0x00 and 0xff are used to terminate positive and negative numbers + * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12. + * + * The first byte contain the overall sign, exponent sign and some of the + * exponent. These have to be ordered (-ve value, decreasing exponent), + * zero, (+ve value, increasing exponent). + * + * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0. + * -ve values are the 1's compliments (so 0x7f isn't used!). + * + * This only leaves 63 byte values for +ve exponents - which isn't enough. + * The largest 4 exponent values are used to hold a byte count of the + * number of following bytes that contain 8 exponent bits per byte, + * This lets us sort exponents from -2^31 to +2^31. + * + * The mantissa is stored 2 digits per byte offset by 0x40, for negative + * numbers the order must be reversed (they are bit inverted). + * + * Reverse sorts are done by inverting the sign of the number. + */ +#define MAX_EXP_ENC ((int)sizeof(int)) + +static u_char * +number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, + int reverse) +{ + int exponent = -1; + int had_dp = 0; + u_char *tline; + char ch; + unsigned int val; + u_char *last_nz_pos; + u_char negate; + + if (reverse & R) + negate = 0xff; + else + negate = 0; + + /* Give ourselves space for the key terminator */ + bufend--; + + /* Ensure we have enough space for the exponent */ + if (pos + 1 + MAX_EXP_ENC > bufend) + return (NULL); + + SKIP_BLANKS(line); + if (*line == '-') { /* set the sign */ + negate ^= 0xff; + line++; + } else if (*line == '+') { + line++; + } + + /* eat initial zeroes */ + for (; *line == '0' && line < lineend; line++) + continue; + + /* calculate exponents */ + if (*line == DECIMAL_POINT) { + /* Decimal fraction */ + had_dp = 1; + while (*++line == '0' && line < lineend) + exponent--; + } else { + /* Large (absolute) value, count digits */ + for (tline = line; *tline >= '0' && + *tline <= '9' && tline < lineend; tline++) + exponent++; + } + + /* If the first/next character isn't a digit, value is zero */ + if (*line < '1' || *line > '9' || line >= lineend) { + /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */ + /* XXX what about NaN, NAN, inf and INF */ + *pos++ = 0x80; + return pos; + } + + /* Maybe here we should allow for e+12 (etc) */ + + if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) { + /* Value ok for simple encoding */ + /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */ + exponent += 0xc0; + *pos++ = negate ^ exponent; + } else { + /* Out or range for a single byte */ + int c, t; + t = exponent > 0 ? exponent : -exponent; + /* Count how many 8-bit bytes are needed */ + for (c = 0; ; c++) { + t >>= 8; + if (t == 0) + break; + } + /* 'c' better be 0..3 here - but probably 0..1 */ + /* Offset just outside valid range */ + t = c + 0x40 - MAX_EXP_ENC; + if (exponent < 0) + t = -t; + *pos++ = negate ^ (t + 0xc0); + /* now add each byte, most significant first */ + for (; c >= 0; c--) + *pos++ = negate ^ (exponent >> (c * 8)); + } + + /* Finally add mantissa, 2 digits per byte */ + for (last_nz_pos = pos; line < lineend; ) { + if (pos >= bufend) + return NULL; + ch = *line++; + val = (ch - '0') * 10; + if (val > 90) { + if (ch == DECIMAL_POINT && !had_dp) { + had_dp = 1; + continue; + } + break; + } + while (line < lineend) { + ch = *line++; + if (ch == DECIMAL_POINT && !had_dp) { + had_dp = 1; + continue; + } + if (ch < '0' || ch > '9') + line = lineend; + else + val += ch - '0'; + break; + } + *pos++ = negate ^ (val + 0x40); + if (val != 0) + last_nz_pos = pos; + } + + /* Add key terminator, deleting any trailing "00" */ + *last_nz_pos++ = negate; + + return (last_nz_pos); +} + +static u_char * +length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, + int flag) +{ + u_char buf[32]; + int l; + SKIP_BLANKS(line); + l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line); + return number(pos, bufend, buf, buf + l, flag); +} |