initial population

author: Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> 2019-02-01 22:55:37 +0000
committer: Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> 2019-02-03 18:22:05 -0600
commit: 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch)
tree: 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort/fields.c
download: userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip
1 files changed, 380 insertions, 0 deletions
diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c
new file mode 100644
index 0000000..6208a78
--- /dev/null
+++ b/usr.bin/sort/fields.c
@@ -0,0 +1,380 @@
+/*	$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $	*/
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Subroutines to generate sort keys. */
+
+#include "sort.h"
+
+__RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $");
+
+#define SKIP_BLANKS(ptr) {					\
+	if (BLANK & d_mask[*(ptr)])				\
+		while (BLANK & d_mask[*(++(ptr))]);		\
+}
+
+#define NEXTCOL(pos) {						\
+	if (!SEP_FLAG)						\
+		while (BLANK & l_d_mask[*(++pos)]);		\
+	while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
+}
+		
+static u_char *enterfield(u_char *, const u_char *, struct field *, int);
+static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
+static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
+
+#define DECIMAL_POINT '.'
+
+/*
+ * constructs sort key with leading recheader, followed by the key,
+ * followed by the original line.
+ */
+length_t
+enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
+    size_t line_size, struct field fieldtable[])
+	/* keybuf:	 pointer to start of key */
+{
+	int i;
+	u_char *l_d_mask;
+	u_char *lineend, *pos;
+	const u_char *endkey;
+	u_char *keypos;
+	struct coldesc *clpos;
+	int col = 1;
+	struct field *ftpos;
+
+	l_d_mask = d_mask;
+	pos = line_data - 1;
+	lineend = line_data + line_size-1;
+				/* don't include rec_delimiter */
+
+	for (i = 0; i < ncols; i++) {
+		clpos = clist + i;
+		for (; (col < clpos->num) && (pos < lineend); col++) {
+			NEXTCOL(pos);
+		}
+		if (pos >= lineend)
+			break;
+		clpos->start = SEP_FLAG ? pos + 1 : pos;
+		NEXTCOL(pos);
+		clpos->end = pos;
+		col++;
+		if (pos >= lineend) {
+			clpos->end = lineend;
+			i++;
+			break;
+		}
+	}
+	for (; i <= ncols; i++)
+		clist[i].start = clist[i].end = lineend;
+	if (clist[0].start < line_data)
+		clist[0].start++;
+
+	/*
+	 * We write the sort keys (concatenated) followed by the
+	 * original line data (for output) as the 'keybuf' data.
+	 * keybuf->length is the number of key bytes + data bytes.
+	 * keybuf->offset is the number of key bytes.
+	 * We add a record separator weight after the key in case
+	 * (as is usual) we need to preserve the order of equal lines,
+	 * and for 'sort -u'.
+	 * The key itself will have had the correct weight applied.
+	 */
+	keypos = keybuf->data;
+	endkey = keybuf_end - line_size - 1;
+	if (endkey <= keypos)
+		/* No room for any key bytes */
+		return 1;
+
+	for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
+		if ((keypos = enterfield(keypos, endkey, ftpos,
+		    fieldtable->flags)) == NULL)
+			return (1);
+	}
+
+	keybuf->offset = keypos - keybuf->data;
+	keybuf->length = keybuf->offset + line_size;
+
+	/*
+	 * Posix requires that equal keys be further sorted by the
+	 * entire original record.
+	 * NetBSD has (at least for some time) kept equal keys in
+	 * their original order.
+	 * For 'sort -u' posix_sort is unset.
+	 */
+	keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
+
+	memcpy(keypos, line_data, line_size);
+	return (0);
+}
+
+/*
+ * constructs a field (as defined by -k) within a key
+ */
+static u_char *
+enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
+    int gflags)
+{
+	u_char *start, *end, *lineend, *mask, *lweight;
+	struct column icol, tcol;
+	u_int flags;
+
+	icol = cur_fld->icol;
+	tcol = cur_fld->tcol;
+	flags = cur_fld->flags;
+	start = icol.p->start;
+	lineend = clist[ncols].end;
+	if (flags & BI)
+		SKIP_BLANKS(start);
+	start += icol.indent;
+	start = min(start, lineend);
+
+	if (!tcol.num)
+		end = lineend;
+	else {
+		if (tcol.indent) {
+			end = tcol.p->start;
+			if (flags & BT)
+				SKIP_BLANKS(end);
+			end += tcol.indent;
+			end = min(end, lineend);
+		} else
+			end = tcol.p->end;
+	}
+
+	if (flags & L)
+		return length(tablepos, endkey, start, end, flags);
+	if (flags & N)
+		return number(tablepos, endkey, start, end, flags);
+
+	/* Bound check space - assuming nothing is skipped */
+	if (tablepos + (end - start) + 1 >= endkey)
+		return NULL;
+
+	mask = cur_fld->mask;
+	lweight = cur_fld->weights;	
+	for (; start < end; start++) {
+		if (!mask || mask[*start]) {
+			*tablepos++ = lweight[*start];
+		}
+	}
+	/* Add extra byte (absent from lweight) to sort short keys correctly */
+	*tablepos++ = lweight[REC_D];
+	return tablepos;
+}
+
+/*
+ * Numbers are converted to a floating point format (exponent & mantissa)
+ * so that they compare correctly as sequence of unsigned bytes.
+ * Bytes 0x00 and 0xff are used to terminate positive and negative numbers
+ * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
+ *
+ * The first byte contain the overall sign, exponent sign and some of the
+ * exponent. These have to be ordered (-ve value, decreasing exponent),
+ * zero, (+ve value, increasing exponent).
+ *
+ * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
+ * -ve values are the 1's compliments (so 0x7f isn't used!).
+ *
+ * This only leaves 63 byte values for +ve exponents - which isn't enough.
+ * The largest 4 exponent values are used to hold a byte count of the
+ * number of following bytes that contain 8 exponent bits per byte,
+ * This lets us sort exponents from -2^31 to +2^31.
+ *
+ * The mantissa is stored 2 digits per byte offset by 0x40, for negative
+ * numbers the order must be reversed (they are bit inverted).
+ *
+ * Reverse sorts are done by inverting the sign of the number.
+ */
+#define MAX_EXP_ENC  ((int)sizeof(int))
+
+static u_char *
+number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
+    int reverse)
+{
+	int exponent = -1;
+	int had_dp = 0;
+	u_char *tline;
+	char ch;
+	unsigned int val;
+	u_char *last_nz_pos;
+	u_char negate;
+
+	if (reverse & R)
+		negate = 0xff;
+	else
+		negate = 0;
+
+	/* Give ourselves space for the key terminator */
+	bufend--;
+
+	/* Ensure we have enough space for the exponent */
+	if (pos + 1 + MAX_EXP_ENC > bufend)
+		return (NULL);
+
+	SKIP_BLANKS(line);
+	if (*line == '-') {	/* set the sign */
+		negate ^= 0xff;
+		line++;
+	} else if (*line == '+') {
+		line++;
+	}
+
+	/* eat initial zeroes */
+	for (; *line == '0' && line < lineend; line++)
+		continue;
+
+	/* calculate exponents */
+	if (*line == DECIMAL_POINT) {
+		/* Decimal fraction */
+		had_dp = 1;
+		while (*++line == '0' && line < lineend)
+			exponent--;
+	} else {
+		/* Large (absolute) value, count digits */
+		for (tline = line; *tline >= '0' && 
+		    *tline <= '9' && tline < lineend; tline++)
+			exponent++;
+	}
+
+	/* If the first/next character isn't a digit, value is zero */
+	if (*line < '1' || *line > '9' || line >= lineend) {
+		/* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
+		/* XXX what about NaN, NAN, inf and INF */
+		*pos++ = 0x80;
+		return pos;
+	}
+
+	/* Maybe here we should allow for e+12 (etc) */
+
+	if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
+		/* Value ok for simple encoding */
+		/* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
+		exponent += 0xc0;
+		*pos++ = negate ^ exponent;
+	} else {
+		/* Out or range for a single byte */
+		int c, t;
+		t = exponent > 0 ? exponent : -exponent;
+		/* Count how many 8-bit bytes are needed */
+		for (c = 0; ; c++) {
+			t >>= 8;
+			if (t == 0)
+				break;
+		}
+		/* 'c' better be 0..3 here - but probably 0..1 */
+		/* Offset just outside valid range */
+		t = c + 0x40 - MAX_EXP_ENC;
+		if (exponent < 0)
+			t = -t;
+		*pos++ = negate ^ (t + 0xc0);
+		/* now add each byte, most significant first */
+		for (; c >= 0; c--)
+			*pos++ = negate ^ (exponent >> (c * 8));
+	}
+
+	/* Finally add mantissa, 2 digits per byte */
+	for (last_nz_pos = pos; line < lineend; ) {
+		if (pos >= bufend)
+			return NULL;
+		ch = *line++;
+		val = (ch - '0') * 10;
+		if (val > 90) {
+			if (ch == DECIMAL_POINT && !had_dp) {
+				had_dp = 1;
+				continue;
+			}
+			break;
+		}
+		while (line < lineend) {
+			ch = *line++;
+			if (ch == DECIMAL_POINT && !had_dp) {
+				had_dp = 1;
+				continue;
+			}
+			if (ch < '0' || ch > '9')
+				line = lineend;
+			else
+				val += ch - '0';
+			break;
+		}
+		*pos++ = negate ^ (val + 0x40);
+		if (val != 0)
+			last_nz_pos = pos;
+	}
+
+	/* Add key terminator, deleting any trailing "00" */
+	*last_nz_pos++ = negate;
+
+	return (last_nz_pos);
+}
+
+static u_char *
+length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
+    int flag)
+{
+	u_char buf[32];
+	int l;
+	SKIP_BLANKS(line);
+	l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
+	return number(pos, bufend, buf, buf + l, flag);
+}
author	Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com>	2019-02-01 22:55:37 +0000
committer	Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com>	2019-02-03 18:22:05 -0600
commit	5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch)
tree	154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort/fields.c
download	userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip