summaryrefslogtreecommitdiff
path: root/usr.bin/sort
diff options
context:
space:
mode:
authorKiyoshi Aman <kiyoshi.aman+adelie@gmail.com>2019-02-01 22:55:37 +0000
committerKiyoshi Aman <kiyoshi.aman+adelie@gmail.com>2019-02-03 18:22:05 -0600
commit5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch)
tree154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/sort
downloaduserland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz
userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip
initial population
Diffstat (limited to 'usr.bin/sort')
-rw-r--r--usr.bin/sort/append.c94
-rw-r--r--usr.bin/sort/fields.c380
-rw-r--r--usr.bin/sort/files.c279
-rw-r--r--usr.bin/sort/fsort.c202
-rw-r--r--usr.bin/sort/fsort.h78
-rw-r--r--usr.bin/sort/init.c447
-rw-r--r--usr.bin/sort/msort.c431
-rw-r--r--usr.bin/sort/pathnames.h66
-rw-r--r--usr.bin/sort/radix_sort.c217
-rw-r--r--usr.bin/sort/sort.1525
-rw-r--r--usr.bin/sort/sort.c419
-rw-r--r--usr.bin/sort/sort.h201
-rw-r--r--usr.bin/sort/tmp.c106
13 files changed, 3445 insertions, 0 deletions
diff --git a/usr.bin/sort/append.c b/usr.bin/sort/append.c
new file mode 100644
index 0000000..07e15e1
--- /dev/null
+++ b/usr.bin/sort/append.c
@@ -0,0 +1,94 @@
+/* $NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+
+__RCSID("$NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $");
+
+#include <stdlib.h>
+
+/*
+ * copy sorted lines to output
+ * Ignore duplicates (marked with -ve keylen)
+ */
+void
+append(RECHEADER **keylist, int nelem, FILE *fp, put_func_t put)
+{
+ RECHEADER **cpos, **lastkey;
+ RECHEADER *crec;
+
+ lastkey = keylist + nelem;
+ if (REVERSE) {
+ for (cpos = lastkey; cpos-- > keylist;) {
+ crec = *cpos;
+ if (crec->keylen >= 0)
+ put(crec, fp);
+ }
+ } else {
+ for (cpos = keylist; cpos < lastkey; cpos++) {
+ crec = *cpos;
+ if (crec->keylen >= 0)
+ put(crec, fp);
+ }
+ }
+}
diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c
new file mode 100644
index 0000000..6208a78
--- /dev/null
+++ b/usr.bin/sort/fields.c
@@ -0,0 +1,380 @@
+/* $NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Subroutines to generate sort keys. */
+
+#include "sort.h"
+
+__RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $");
+
+#define SKIP_BLANKS(ptr) { \
+ if (BLANK & d_mask[*(ptr)]) \
+ while (BLANK & d_mask[*(++(ptr))]); \
+}
+
+#define NEXTCOL(pos) { \
+ if (!SEP_FLAG) \
+ while (BLANK & l_d_mask[*(++pos)]); \
+ while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\
+}
+
+static u_char *enterfield(u_char *, const u_char *, struct field *, int);
+static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
+static u_char *length(u_char *, const u_char *, u_char *, u_char *, int);
+
+#define DECIMAL_POINT '.'
+
+/*
+ * constructs sort key with leading recheader, followed by the key,
+ * followed by the original line.
+ */
+length_t
+enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data,
+ size_t line_size, struct field fieldtable[])
+ /* keybuf: pointer to start of key */
+{
+ int i;
+ u_char *l_d_mask;
+ u_char *lineend, *pos;
+ const u_char *endkey;
+ u_char *keypos;
+ struct coldesc *clpos;
+ int col = 1;
+ struct field *ftpos;
+
+ l_d_mask = d_mask;
+ pos = line_data - 1;
+ lineend = line_data + line_size-1;
+ /* don't include rec_delimiter */
+
+ for (i = 0; i < ncols; i++) {
+ clpos = clist + i;
+ for (; (col < clpos->num) && (pos < lineend); col++) {
+ NEXTCOL(pos);
+ }
+ if (pos >= lineend)
+ break;
+ clpos->start = SEP_FLAG ? pos + 1 : pos;
+ NEXTCOL(pos);
+ clpos->end = pos;
+ col++;
+ if (pos >= lineend) {
+ clpos->end = lineend;
+ i++;
+ break;
+ }
+ }
+ for (; i <= ncols; i++)
+ clist[i].start = clist[i].end = lineend;
+ if (clist[0].start < line_data)
+ clist[0].start++;
+
+ /*
+ * We write the sort keys (concatenated) followed by the
+ * original line data (for output) as the 'keybuf' data.
+ * keybuf->length is the number of key bytes + data bytes.
+ * keybuf->offset is the number of key bytes.
+ * We add a record separator weight after the key in case
+ * (as is usual) we need to preserve the order of equal lines,
+ * and for 'sort -u'.
+ * The key itself will have had the correct weight applied.
+ */
+ keypos = keybuf->data;
+ endkey = keybuf_end - line_size - 1;
+ if (endkey <= keypos)
+ /* No room for any key bytes */
+ return 1;
+
+ for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {
+ if ((keypos = enterfield(keypos, endkey, ftpos,
+ fieldtable->flags)) == NULL)
+ return (1);
+ }
+
+ keybuf->offset = keypos - keybuf->data;
+ keybuf->length = keybuf->offset + line_size;
+
+ /*
+ * Posix requires that equal keys be further sorted by the
+ * entire original record.
+ * NetBSD has (at least for some time) kept equal keys in
+ * their original order.
+ * For 'sort -u' posix_sort is unset.
+ */
+ keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;
+
+ memcpy(keypos, line_data, line_size);
+ return (0);
+}
+
+/*
+ * constructs a field (as defined by -k) within a key
+ */
+static u_char *
+enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld,
+ int gflags)
+{
+ u_char *start, *end, *lineend, *mask, *lweight;
+ struct column icol, tcol;
+ u_int flags;
+
+ icol = cur_fld->icol;
+ tcol = cur_fld->tcol;
+ flags = cur_fld->flags;
+ start = icol.p->start;
+ lineend = clist[ncols].end;
+ if (flags & BI)
+ SKIP_BLANKS(start);
+ start += icol.indent;
+ start = min(start, lineend);
+
+ if (!tcol.num)
+ end = lineend;
+ else {
+ if (tcol.indent) {
+ end = tcol.p->start;
+ if (flags & BT)
+ SKIP_BLANKS(end);
+ end += tcol.indent;
+ end = min(end, lineend);
+ } else
+ end = tcol.p->end;
+ }
+
+ if (flags & L)
+ return length(tablepos, endkey, start, end, flags);
+ if (flags & N)
+ return number(tablepos, endkey, start, end, flags);
+
+ /* Bound check space - assuming nothing is skipped */
+ if (tablepos + (end - start) + 1 >= endkey)
+ return NULL;
+
+ mask = cur_fld->mask;
+ lweight = cur_fld->weights;
+ for (; start < end; start++) {
+ if (!mask || mask[*start]) {
+ *tablepos++ = lweight[*start];
+ }
+ }
+ /* Add extra byte (absent from lweight) to sort short keys correctly */
+ *tablepos++ = lweight[REC_D];
+ return tablepos;
+}
+
+/*
+ * Numbers are converted to a floating point format (exponent & mantissa)
+ * so that they compare correctly as sequence of unsigned bytes.
+ * Bytes 0x00 and 0xff are used to terminate positive and negative numbers
+ * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
+ *
+ * The first byte contain the overall sign, exponent sign and some of the
+ * exponent. These have to be ordered (-ve value, decreasing exponent),
+ * zero, (+ve value, increasing exponent).
+ *
+ * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0.
+ * -ve values are the 1's compliments (so 0x7f isn't used!).
+ *
+ * This only leaves 63 byte values for +ve exponents - which isn't enough.
+ * The largest 4 exponent values are used to hold a byte count of the
+ * number of following bytes that contain 8 exponent bits per byte,
+ * This lets us sort exponents from -2^31 to +2^31.
+ *
+ * The mantissa is stored 2 digits per byte offset by 0x40, for negative
+ * numbers the order must be reversed (they are bit inverted).
+ *
+ * Reverse sorts are done by inverting the sign of the number.
+ */
+#define MAX_EXP_ENC ((int)sizeof(int))
+
+static u_char *
+number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
+ int reverse)
+{
+ int exponent = -1;
+ int had_dp = 0;
+ u_char *tline;
+ char ch;
+ unsigned int val;
+ u_char *last_nz_pos;
+ u_char negate;
+
+ if (reverse & R)
+ negate = 0xff;
+ else
+ negate = 0;
+
+ /* Give ourselves space for the key terminator */
+ bufend--;
+
+ /* Ensure we have enough space for the exponent */
+ if (pos + 1 + MAX_EXP_ENC > bufend)
+ return (NULL);
+
+ SKIP_BLANKS(line);
+ if (*line == '-') { /* set the sign */
+ negate ^= 0xff;
+ line++;
+ } else if (*line == '+') {
+ line++;
+ }
+
+ /* eat initial zeroes */
+ for (; *line == '0' && line < lineend; line++)
+ continue;
+
+ /* calculate exponents */
+ if (*line == DECIMAL_POINT) {
+ /* Decimal fraction */
+ had_dp = 1;
+ while (*++line == '0' && line < lineend)
+ exponent--;
+ } else {
+ /* Large (absolute) value, count digits */
+ for (tline = line; *tline >= '0' &&
+ *tline <= '9' && tline < lineend; tline++)
+ exponent++;
+ }
+
+ /* If the first/next character isn't a digit, value is zero */
+ if (*line < '1' || *line > '9' || line >= lineend) {
+ /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
+ /* XXX what about NaN, NAN, inf and INF */
+ *pos++ = 0x80;
+ return pos;
+ }
+
+ /* Maybe here we should allow for e+12 (etc) */
+
+ if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {
+ /* Value ok for simple encoding */
+ /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
+ exponent += 0xc0;
+ *pos++ = negate ^ exponent;
+ } else {
+ /* Out or range for a single byte */
+ int c, t;
+ t = exponent > 0 ? exponent : -exponent;
+ /* Count how many 8-bit bytes are needed */
+ for (c = 0; ; c++) {
+ t >>= 8;
+ if (t == 0)
+ break;
+ }
+ /* 'c' better be 0..3 here - but probably 0..1 */
+ /* Offset just outside valid range */
+ t = c + 0x40 - MAX_EXP_ENC;
+ if (exponent < 0)
+ t = -t;
+ *pos++ = negate ^ (t + 0xc0);
+ /* now add each byte, most significant first */
+ for (; c >= 0; c--)
+ *pos++ = negate ^ (exponent >> (c * 8));
+ }
+
+ /* Finally add mantissa, 2 digits per byte */
+ for (last_nz_pos = pos; line < lineend; ) {
+ if (pos >= bufend)
+ return NULL;
+ ch = *line++;
+ val = (ch - '0') * 10;
+ if (val > 90) {
+ if (ch == DECIMAL_POINT && !had_dp) {
+ had_dp = 1;
+ continue;
+ }
+ break;
+ }
+ while (line < lineend) {
+ ch = *line++;
+ if (ch == DECIMAL_POINT && !had_dp) {
+ had_dp = 1;
+ continue;
+ }
+ if (ch < '0' || ch > '9')
+ line = lineend;
+ else
+ val += ch - '0';
+ break;
+ }
+ *pos++ = negate ^ (val + 0x40);
+ if (val != 0)
+ last_nz_pos = pos;
+ }
+
+ /* Add key terminator, deleting any trailing "00" */
+ *last_nz_pos++ = negate;
+
+ return (last_nz_pos);
+}
+
+static u_char *
+length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
+ int flag)
+{
+ u_char buf[32];
+ int l;
+ SKIP_BLANKS(line);
+ l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);
+ return number(pos, bufend, buf, buf + l, flag);
+}
diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c
new file mode 100644
index 0000000..7cb27c5
--- /dev/null
+++ b/usr.bin/sort/files.c
@@ -0,0 +1,279 @@
+/* $NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+#include "fsort.h"
+
+__RCSID("$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $");
+
+#include <string.h>
+
+/* Align records in temporary files to avoid misaligned copies */
+#define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
+
+static ssize_t seq(FILE *, u_char **);
+
+/*
+ * this is called when there is no special key. It's only called
+ * in the first fsort pass.
+ */
+
+static u_char *opos;
+static size_t osz;
+
+void
+makeline_copydown(RECHEADER *recbuf)
+{
+ memmove(recbuf->data, opos, osz);
+}
+
+int
+makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
+{
+ u_char *pos;
+ int c;
+
+ pos = recbuf->data;
+ if (osz != 0) {
+ /*
+ * Buffer shortage is solved by either of two ways:
+ * o flush previous buffered data and start using the
+ * buffer from start.
+ * makeline_copydown() above must be called.
+ * o realloc buffer
+ *
+ * This code has relied on realloc changing 'bufend',
+ * but that isn't necessarily true.
+ */
+ pos += osz;
+ osz = 0;
+ }
+
+ while (pos < bufend) {
+ c = getc(fp);
+ if (c == EOF) {
+ if (pos == recbuf->data) {
+ FCLOSE(fp);
+ return EOF;
+ }
+ /* Add terminator to partial line */
+ c = REC_D;
+ }
+ *pos++ = c;
+ if (c == REC_D) {
+ recbuf->offset = 0;
+ recbuf->length = pos - recbuf->data;
+ recbuf->keylen = recbuf->length - 1;
+ return (0);
+ }
+ }
+
+ /* Ran out of buffer space... */
+ if (recbuf->data < bufend) {
+ /* Remember where the partial record is */
+ osz = pos - recbuf->data;
+ opos = recbuf->data;
+ }
+ return (BUFFEND);
+}
+
+/*
+ * This generates keys. It's only called in the first fsort pass
+ */
+int
+makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
+{
+ static u_char *line_data;
+ static ssize_t line_size;
+ static int overflow = 0;
+
+ /* We get re-entered after returning BUFFEND - save old data */
+ if (overflow) {
+ overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
+ return overflow ? BUFFEND : 0;
+ }
+
+ line_size = seq(fp, &line_data);
+ if (line_size == 0) {
+ FCLOSE(fp);
+ return EOF;
+ }
+
+ if (line_size > bufend - recbuf->data) {
+ overflow = 1;
+ } else {
+ overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
+ }
+ return overflow ? BUFFEND : 0;
+}
+
+/*
+ * get a line of input from fp
+ */
+static ssize_t
+seq(FILE *fp, u_char **line)
+{
+ static u_char *buf;
+ static size_t buf_size = DEFLLEN;
+ u_char *end, *pos;
+ int c;
+ u_char *new_buf;
+
+ if (!buf) {
+ /* one-time initialization */
+ buf = malloc(buf_size);
+ if (!buf)
+ err(2, "malloc of linebuf for %zu bytes failed",
+ buf_size);
+ }
+
+ end = buf + buf_size;
+ pos = buf;
+ while ((c = getc(fp)) != EOF) {
+ *pos++ = c;
+ if (c == REC_D) {
+ *line = buf;
+ return pos - buf;
+ }
+ if (pos == end) {
+ /* Long line - double size of buffer */
+ /* XXX: Check here for stupidly long lines */
+ buf_size *= 2;
+ new_buf = realloc(buf, buf_size);
+ if (!new_buf)
+ err(2, "realloc of linebuf to %zu bytes failed",
+ buf_size);
+
+ end = new_buf + buf_size;
+ pos = new_buf + (pos - buf);
+ buf = new_buf;
+ }
+ }
+
+ if (pos != buf) {
+ /* EOF part way through line - add line terminator */
+ *pos++ = REC_D;
+ *line = buf;
+ return pos - buf;
+ }
+
+ return 0;
+}
+
+/*
+ * write a key/line pair to a temporary file
+ */
+void
+putrec(const RECHEADER *rec, FILE *fp)
+{
+ EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp,
+ "failed to write temp file");
+}
+
+/*
+ * write a line to output
+ */
+void
+putline(const RECHEADER *rec, FILE *fp)
+{
+ EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp,
+ "failed to write");
+}
+
+/*
+ * write dump of key to output (for -Dk)
+ */
+void
+putkeydump(const RECHEADER *rec, FILE *fp)
+{
+ EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp,
+ "failed to write debug key");
+}
+
+/*
+ * get a record from a temporary file. (Used by merge sort.)
+ */
+int
+geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
+{
+ length_t file_len;
+ int i;
+
+ (void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
+
+ if ((u_char *)(rec + 1) > end)
+ return (BUFFEND);
+ if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
+ fclose(fp);
+ return (EOF);
+ }
+ file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
+ if (end - rec->data < (ptrdiff_t)file_len) {
+ for (i = sizeof rec->length - 1; i >= 0; i--)
+ ungetc(*((char *) rec + i), fp);
+ return (BUFFEND);
+ }
+
+ fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
+ return (0);
+}
diff --git a/usr.bin/sort/fsort.c b/usr.bin/sort/fsort.c
new file mode 100644
index 0000000..c229ae2
--- /dev/null
+++ b/usr.bin/sort/fsort.c
@@ -0,0 +1,202 @@
+/* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Read in a block of records (until 'enough').
+ * sort, write to temp file.
+ * Merge sort temp files into output file
+ * Small files miss out the temp file stage.
+ * Large files might get multiple merges.
+ */
+#include "sort.h"
+#include "fsort.h"
+
+__RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $");
+
+#include <stdlib.h>
+#include <string.h>
+
+#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
+
+void
+fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
+{
+ RECHEADER **keylist;
+ RECHEADER **keypos, **keyp;
+ RECHEADER *buffer;
+ size_t bufsize = DEFBUFSIZE;
+ u_char *bufend;
+ int mfct = 0;
+ int c, nelem;
+ get_func_t get;
+ RECHEADER *crec;
+ RECHEADER *nbuffer;
+ FILE *fp, *tmp_fp;
+ int file_no;
+ int max_recs = DEBUG('m') ? 16 : MAXNUM;
+
+ buffer = allocrec(NULL, bufsize);
+ bufend = (u_char *)buffer + bufsize;
+ /* Allocate double length keymap for radix_sort */
+ keylist = malloc(2 * max_recs * sizeof(*keylist));
+ if (buffer == NULL || keylist == NULL)
+ err(2, "failed to malloc initial buffer or keylist");
+
+ if (SINGL_FLD)
+ /* Key and data are one! */
+ get = makeline;
+ else
+ /* Key (merged key fields) added before data */
+ get = makekey;
+
+ file_no = 0;
+ fp = fopen(filelist->names[0], "r");
+ if (fp == NULL)
+ err(2, "%s", filelist->names[0]);
+
+ /* Loop through reads of chunk of input files that get sorted
+ * and then merged together. */
+ for (;;) {
+ keypos = keylist;
+ nelem = 0;
+ crec = buffer;
+ makeline_copydown(crec);
+
+ /* Loop reading records */
+ for (;;) {
+ c = get(fp, crec, bufend, ftbl);
+ /* 'c' is 0, EOF or BUFFEND */
+ if (c == 0) {
+ /* Save start of key in input buffer */
+ *keypos++ = crec;
+ if (++nelem == max_recs) {
+ c = BUFFEND;
+ break;
+ }
+ crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
+ continue;
+ }
+ if (c == EOF) {
+ /* try next file */
+ if (++file_no >= nfiles)
+ /* no more files */
+ break;
+ fp = fopen(filelist->names[file_no], "r");
+ if (fp == NULL)
+ err(2, "%s", filelist->names[file_no]);
+ continue;
+ }
+ if (nelem >= max_recs
+ || (bufsize >= MAXBUFSIZE && nelem > 8))
+ /* Need to sort and save this lot of data */
+ break;
+
+ /* c == BUFFEND, and we can process more data */
+ /* Allocate a larger buffer for this lot of data */
+ bufsize *= 2;
+ nbuffer = allocrec(buffer, bufsize);
+ if (!nbuffer) {
+ err(2, "failed to realloc buffer to %zu bytes",
+ bufsize);
+ }
+
+ /* patch up keylist[] */
+ for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
+ *keyp = nbuffer + (*keyp - buffer);
+
+ crec = nbuffer + (crec - buffer);
+ buffer = nbuffer;
+ bufend = (u_char *)buffer + bufsize;
+ }
+
+ /* Sort this set of records */
+ radix_sort(keylist, keylist + max_recs, nelem);
+
+ if (c == EOF && mfct == 0) {
+ /* all the data is (sorted) in the buffer */
+ append(keylist, nelem, outfp,
+ DEBUG('k') ? putkeydump : putline);
+ break;
+ }
+
+ /* Save current data to a temporary file for a later merge */
+ if (nelem != 0) {
+ tmp_fp = ftmp();
+ append(keylist, nelem, tmp_fp, putrec);
+ save_for_merge(tmp_fp, geteasy, ftbl);
+ }
+ mfct = 1;
+
+ if (c == EOF) {
+ /* merge to output file */
+ merge_sort(outfp,
+ DEBUG('k') ? putkeydump : putline, ftbl);
+ break;
+ }
+ }
+
+ free(keylist);
+ keylist = NULL;
+ free(buffer);
+ buffer = NULL;
+}
diff --git a/usr.bin/sort/fsort.h b/usr.bin/sort/fsort.h
new file mode 100644
index 0000000..3d8ef4c
--- /dev/null
+++ b/usr.bin/sort/fsort.h
@@ -0,0 +1,78 @@
+/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fsort.h 8.1 (Berkeley) 6/6/93
+ */
+
+#define BUFSIZE (1<<20)
+#define MAXNUM 131072 /* low guess at average record count */
+#define BUFFEND (EOF-2)
+#define MAXFCT 1000
+#define DEFLLEN 65536
+
+/*
+ * Default (initial) and maximum size of record buffer for fsort().
+ * Note that no more than MAXNUM records are stored in the buffer,
+ * even if the buffer is not full yet.
+ */
+#define DEFBUFSIZE (1 << 20) /* 1MB */
+#define MAXBUFSIZE (8 << 20) /* 10 MB */
diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c
new file mode 100644
index 0000000..ff1799b
--- /dev/null
+++ b/usr.bin/sort/init.c
@@ -0,0 +1,447 @@
+/* $NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+
+__RCSID("$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $");
+
+#include <ctype.h>
+#include <string.h>
+
+static void insertcol(struct field *);
+static const char *setcolumn(const char *, struct field *);
+
+/*
+ * masks of ignored characters.
+ */
+static u_char dtable[NBINS], itable[NBINS];
+
+/*
+ * parsed key options
+ */
+struct coldesc *clist = NULL;
+int ncols = 0;
+
+/*
+ * clist (list of columns which correspond to one or more icol or tcol)
+ * is in increasing order of columns.
+ * Fields are kept in increasing order of fields.
+ */
+
+/*
+ * keep clist in order--inserts a column in a sorted array
+ */
+static void
+insertcol(struct field *field)
+{
+ int i;
+ struct coldesc *p;
+
+ /* Make space for new item */
+ p = realloc(clist, (ncols + 2) * sizeof(*clist));
+ if (!p)
+ err(1, "realloc");
+ clist = p;
+ memset(&clist[ncols], 0, sizeof(clist[ncols]));
+
+ for (i = 0; i < ncols; i++)
+ if (field->icol.num <= clist[i].num)
+ break;
+ if (field->icol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->icol.num;
+ ncols++;
+ }
+ if (field->tcol.num && field->tcol.num != field->icol.num) {
+ for (i = 0; i < ncols; i++)
+ if (field->tcol.num <= clist[i].num)
+ break;
+ if (field->tcol.num != clist[i].num) {
+ memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
+ clist[i].num = field->tcol.num;
+ ncols++;
+ }
+ }
+}
+
+/*
+ * matches fields with the appropriate columns--n^2 but who cares?
+ */
+void
+fldreset(struct field *fldtab)
+{
+ int i;
+
+ fldtab[0].tcol.p = clist + ncols - 1;
+ for (++fldtab; fldtab->icol.num; ++fldtab) {
+ for (i = 0; fldtab->icol.num != clist[i].num; i++)
+ ;
+ fldtab->icol.p = clist + i;
+ if (!fldtab->tcol.num)
+ continue;
+ for (i = 0; fldtab->tcol.num != clist[i].num; i++)
+ ;
+ fldtab->tcol.p = clist + i;
+ }
+}
+
+/*
+ * interprets a column in a -k field
+ */
+static const char *
+setcolumn(const char *pos, struct field *cur_fld)
+{
+ struct column *col;
+ char *npos;
+ int tmp;
+ col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
+ col->num = (int) strtol(pos, &npos, 10);
+ pos = npos;
+ if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
+ errx(2, "field numbers must be positive");
+ if (*pos == '.') {
+ if (!col->num)
+ errx(2, "cannot indent end of line");
+ ++pos;
+ col->indent = (int) strtol(pos, &npos, 10);
+ pos = npos;
+ if (&cur_fld->icol == col)
+ col->indent--;
+ if (col->indent < 0)
+ errx(2, "illegal offset");
+ }
+ for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
+ cur_fld->flags |= tmp;
+ if (cur_fld->icol.num == 0)
+ cur_fld->icol.num = 1;
+ return (pos);
+}
+
+int
+setfield(const char *pos, struct field *cur_fld, int gflag)
+{
+ cur_fld->mask = NULL;
+
+ pos = setcolumn(pos, cur_fld);
+ if (*pos == '\0') /* key extends to EOL. */
+ cur_fld->tcol.num = 0;
+ else {
+ if (*pos != ',')
+ errx(2, "illegal field descriptor");
+ setcolumn((++pos), cur_fld);
+ }
+ if (!cur_fld->flags)
+ cur_fld->flags = gflag;
+ if (REVERSE)
+ /* A local 'r' doesn't invert the global one */
+ cur_fld->flags &= ~R;
+
+ /* Assign appropriate mask table and weight table. */
+ cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
+ if (cur_fld->flags & I)
+ cur_fld->mask = itable;
+ else if (cur_fld->flags & D)
+ cur_fld->mask = dtable;
+
+ cur_fld->flags |= (gflag & (BI | BT));
+ if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
+ cur_fld->flags &= ~BT;
+
+ if (cur_fld->tcol.num
+ && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
+ && (cur_fld->tcol.num <= cur_fld->icol.num
+ /* indent if 0 -> end of field, i.e. okay */
+ && cur_fld->tcol.indent != 0
+ && cur_fld->tcol.indent < cur_fld->icol.indent))
+ errx(2, "fields out of order");
+
+ insertcol(cur_fld);
+ return (cur_fld->tcol.num);
+}
+
+int
+optval(int desc, int tcolflag)
+{
+ switch(desc) {
+ case 'b':
+ if (!tcolflag)
+ return BI;
+ else
+ return BT;
+ case 'd': return D;
+ case 'f': return F;
+ case 'i': return I;
+ case 'l': return L;
+ case 'n': return N;
+ case 'r': return R;
+ default: return 0;
+ }
+}
+
+/*
+ * Return true if the options found in ARG, according to the getopt
+ * spec in OPTS, require an additional argv word as an option
+ * argument.
+ */
+static int
+options_need_argument(const char *arg, const char *opts)
+{
+ size_t pos;
+ const char *s;
+
+ /*assert(arg[0] == '-');*/
+
+ pos = 1;
+ while (arg[pos]) {
+ s = strchr(opts, arg[pos]);
+ if (s == NULL) {
+ /* invalid option */
+ return 0;
+ }
+ if (s[1] == ':') {
+ /* option requires argument */
+ if (arg[pos+1] == '\0') {
+ /* no argument in this arg */
+ return 1;
+ }
+ else {
+ /* argument is in this arg; no more options */
+ return 0;
+ }
+ }
+ pos++;
+ }
+ return 0;
+}
+
+/*
+ * Replace historic +SPEC arguments with appropriate -kSPEC.
+ *
+ * The form can be either a single +SPEC or a pair +SPEC -SPEC.
+ * The following -SPEC is not recognized unless it follows
+ * immediately.
+ */
+void
+fixit(int *argc, char **argv, const char *opts)
+{
+ int i, j, sawplus;
+ char *vpos, *tpos, spec[20];
+ int col, indent;
+
+ sawplus = 0;
+ for (i = 1; i < *argc; i++) {
+ /*
+ * This loop must stop exactly where getopt will stop.
+ * Otherwise it turns e.g. "sort x +3" into "sort x
+ * -k4.1", which will croak if +3 was in fact really a
+ * file name. In order to do this reliably we need to
+ * be able to identify argv words that are option
+ * arguments.
+ */
+
+ if (!strcmp(argv[i], "--")) {
+ /* End of options; stop. */
+ break;
+ }
+
+ if (argv[i][0] == '+') {
+ /* +POS argument */
+ sawplus = 1;
+ } else if (argv[i][0] == '-' && sawplus &&
+ isdigit((unsigned char)argv[i][1])) {
+ /* -POS argument */
+ sawplus = 0;
+ } else if (argv[i][0] == '-') {
+ /* other option */
+ sawplus = 0;
+ if (options_need_argument(argv[i], opts)) {
+ /* skip over the argument */
+ i++;
+ }
+ continue;
+ } else {
+ /* not an option at all; stop */
+ sawplus = 0;
+ break;
+ }
+
+ /*
+ * At this point argv[i] is an old-style spec. The
+ * sawplus flag used by the above loop logic also
+ * tells us if it's a +SPEC or -SPEC.
+ */
+
+ /* parse spec */
+ tpos = argv[i]+1;
+ col = (int)strtol(tpos, &tpos, 10);
+ if (*tpos == '.') {
+ ++tpos;
+ indent = (int) strtol(tpos, &tpos, 10);
+ } else
+ indent = 0;
+ /* tpos now points to the optional flags */
+
+ /*
+ * In the traditional form, x.0 means beginning of line;
+ * in the new form, x.0 means end of line. Adjust the
+ * value of INDENT accordingly.
+ */
+ if (sawplus) {
+ /* +POS */
+ col += 1;
+ indent += 1;
+ } else {
+ /* -POS */
+ if (indent > 0)
+ col += 1;
+ }
+
+ /* make the new style spec */
+ (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
+ tpos);
+
+ if (sawplus) {
+ /* Replace the +POS argument with new-style -kSPEC */
+ asprintf(&vpos, "-k%s", spec);
+ argv[i] = vpos;
+ } else {
+ /*
+ * Append the spec to the one from the
+ * preceding +POS argument, and remove the
+ * current argv element entirely.
+ */
+ asprintf(&vpos, "%s,%s", argv[i-1], spec);
+ free(argv[i-1]);
+ argv[i-1] = vpos;
+ for (j=i; j < *argc; j++)
+ argv[j] = argv[j+1];
+ *argc -= 1;
+ i--;
+ }
+ }
+}
+
+/*
+ * ascii, Rascii, Ftable, and RFtable map
+ *
+ * Sorting 'weight' tables.
+ * Convert 'ascii' characters into their sort order.
+ * The 'F' variants fold lower case to upper equivalent
+ * The 'R' variants are for reverse sorting.
+ *
+ * The record separator (REC_D) never needs a weight, this frees one
+ * byte value as an 'end of key' marker. This must be 0 for normal
+ * weight tables, and 0xff for reverse weight tables - and is used
+ * to terminate keys so that short keys sort before (after if reverse)
+ * longer keys.
+ *
+ * The field separator has a normal weight - although it cannot occur
+ * within a key unless it is the default (space+tab).
+ *
+ * All other bytes map to the appropriate value for the sort order.
+ * Numeric sorts don't need any tables, they are reversed by negation.
+ *
+ * Global reverse sorts are done by writing the sorted keys in reverse
+ * order - the sort itself is stil forwards.
+ * This means that weights are only ever used when generating keys, any
+ * sort of the original data bytes is always forwards and unweighted.
+ *
+ * Note: this is only good for ASCII sorting. For different LC 's,
+ * all bets are off.
+ *
+ * itable[] and dtable[] are the masks for -i (ignore non-printables)
+ * and -d (only sort blank and alphanumerics).
+ */
+void
+settables(void)
+{
+ int i;
+ int next_weight = 1;
+ int rev_weight = 254;
+
+ ascii[REC_D] = 0;
+ Rascii[REC_D] = 255;
+ Ftable[REC_D] = 0;
+ RFtable[REC_D] = 255;
+
+ for (i = 0; i < 256; i++) {
+ if (i == REC_D)
+ continue;
+ ascii[i] = next_weight;
+ Rascii[i] = rev_weight;
+ if (Ftable[i] == 0) {
+ Ftable[i] = next_weight;
+ RFtable[i] = rev_weight;
+ Ftable[tolower(i)] = next_weight;
+ RFtable[tolower(i)] = rev_weight;
+ }
+ next_weight++;
+ rev_weight--;
+
+ if (i == '\n' || isprint(i))
+ itable[i] = 1;
+
+ if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
+ dtable[i] = 1;
+ }
+}
diff --git a/usr.bin/sort/msort.c b/usr.bin/sort/msort.c
new file mode 100644
index 0000000..6cbcf90
--- /dev/null
+++ b/usr.bin/sort/msort.c
@@ -0,0 +1,431 @@
+/* $NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "sort.h"
+#include "fsort.h"
+
+__RCSID("$NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $");
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <util.h>
+
+/* Subroutines using comparisons: merge sort and check order */
+#define DELETE (1)
+
+typedef struct mfile {
+ FILE *fp;
+ get_func_t get;
+ RECHEADER *rec;
+ u_char *end;
+} MFILE;
+
+static int cmp(RECHEADER *, RECHEADER *);
+static int insert(struct mfile **, struct mfile *, int, int);
+static void merge_sort_fstack(FILE *, put_func_t, struct field *);
+
+/*
+ * Number of files merge() can merge in one pass.
+ */
+#define MERGE_FNUM 16
+
+static struct mfile fstack[MERGE_FNUM];
+static struct mfile fstack_1[MERGE_FNUM];
+static struct mfile fstack_2[MERGE_FNUM];
+static int fstack_count, fstack_1_count, fstack_2_count;
+
+void
+save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
+{
+ FILE *mfp, *mfp1, *mfp2;
+
+ if (fstack_count == MERGE_FNUM) {
+ /* Must reduce the number of temporary files */
+ mfp = ftmp();
+ merge_sort_fstack(mfp, putrec, ftbl);
+ /* Save output in next layer */
+ if (fstack_1_count == MERGE_FNUM) {
+ mfp1 = ftmp();
+ memcpy(fstack, fstack_1, sizeof fstack);
+ merge_sort_fstack(mfp1, putrec, ftbl);
+ if (fstack_2_count == MERGE_FNUM) {
+ /* More than 4096 files! */
+ mfp2 = ftmp();
+ memcpy(fstack, fstack_2, sizeof fstack);
+ merge_sort_fstack(mfp2, putrec, ftbl);
+ fstack_2[0].fp = mfp2;
+ fstack_2_count = 1;
+ }
+ fstack_2[fstack_2_count].fp = mfp1;
+ fstack_2[fstack_2_count].get = geteasy;
+ fstack_2_count++;
+ fstack_1_count = 0;
+ }
+ fstack_1[fstack_1_count].fp = mfp;
+ fstack_1[fstack_1_count].get = geteasy;
+ fstack_1_count++;
+ fstack_count = 0;
+ }
+
+ fstack[fstack_count].fp = fp;
+ fstack[fstack_count++].get = get;
+}
+
+void
+fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
+{
+ get_func_t get = SINGL_FLD ? makeline : makekey;
+ FILE *fp;
+ int i;
+
+ for (i = 0; i < nfiles; i++) {
+ fp = fopen(filelist->names[i], "r");
+ if (fp == NULL)
+ err(2, "%s", filelist->names[i]);
+ save_for_merge(fp, get, ftbl);
+ }
+
+ merge_sort(outfp, putline, ftbl);
+}
+
+void
+merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
+{
+ int count = fstack_1_count + fstack_2_count;
+ FILE *mfp;
+ int i;
+
+ if (count == 0) {
+ /* All files in initial array */
+ merge_sort_fstack(outfp, put, ftbl);
+ return;
+ }
+
+ count += fstack_count;
+
+ /* Too many files for one merge sort */
+ for (;;) {
+ /* Sort latest 16 files */
+ i = count;
+ if (i > MERGE_FNUM)
+ i = MERGE_FNUM;
+ while (fstack_count > 0)
+ fstack[--i] = fstack[--fstack_count];
+ while (i > 0 && fstack_1_count > 0)
+ fstack[--i] = fstack_1[--fstack_1_count];
+ while (i > 0)
+ fstack[--i] = fstack_2[--fstack_2_count];
+ if (count <= MERGE_FNUM) {
+ /* Got all the data */
+ fstack_count = count;
+ merge_sort_fstack(outfp, put, ftbl);
+ return;
+ }
+ mfp = ftmp();
+ fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;
+ merge_sort_fstack(mfp, putrec, ftbl);
+ fstack[0].fp = mfp;
+ fstack[0].get = geteasy;
+ fstack_count = 1;
+ count -= MERGE_FNUM - 1;
+ }
+}
+
+static void
+merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl)
+{
+ struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
+ RECHEADER *new_rec;
+ u_char *new_end;
+ void *tmp;
+ int c, i, nfiles;
+ size_t sz;
+
+ /* Read one record from each file (read again if a duplicate) */
+ for (nfiles = i = 0; i < fstack_count; i++) {
+ cfile = &fstack[i];
+ if (cfile->rec == NULL) {
+ cfile->rec = allocrec(NULL, DEFLLEN);
+ cfile->end = (u_char *)cfile->rec + DEFLLEN;
+ }
+ rewind(cfile->fp);
+
+ for (;;) {
+ c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
+ if (c == EOF)
+ break;
+
+ if (c == BUFFEND) {
+ /* Double buffer size */
+ sz = (cfile->end - (u_char *)cfile->rec) * 2;
+ cfile->rec = allocrec(cfile->rec, sz);
+ cfile->end = (u_char *)cfile->rec + sz;
+ continue;
+ }
+
+ if (nfiles != 0) {
+ if (insert(flist, cfile, nfiles, !DELETE))
+ /* Duplicate removed */
+ continue;
+ } else
+ flist[0] = cfile;
+ nfiles++;
+ break;
+ }
+ }
+
+ if (nfiles == 0)
+ return;
+
+ /*
+ * We now loop reading a new record from the file with the
+ * 'sorted first' existing record.
+ * As each record is added, the 'first' record is written to the
+ * output file - maintaining one record from each file in the sorted
+ * list.
+ */
+ new_rec = allocrec(NULL, DEFLLEN);
+ new_end = (u_char *)new_rec + DEFLLEN;
+ for (;;) {
+ cfile = flist[0];
+ c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
+ if (c == EOF) {
+ /* Write out last record from now-empty input */
+ put(cfile->rec, outfp);
+ if (--nfiles == 0)
+ break;
+ /* Replace from file with now-first sorted record. */
+ /* (Moving base 'flist' saves copying everything!) */
+ flist++;
+ continue;
+ }
+ if (c == BUFFEND) {
+ /* Buffer not large enough - double in size */
+ sz = (new_end - (u_char *)new_rec) * 2;
+ new_rec = allocrec(new_rec, sz);
+ new_end = (u_char *)new_rec +sz;
+ continue;
+ }
+
+ /* Swap in new buffer, saving old */
+ tmp = cfile->rec;
+ cfile->rec = new_rec;
+ new_rec = tmp;
+ tmp = cfile->end;
+ cfile->end = new_end;
+ new_end = tmp;
+
+ /* Add into sort, removing the original first entry */
+ c = insert(flist, cfile, nfiles, DELETE);
+ if (c != 0 || (UNIQUE && cfile == flist[0]
+ && cmp(new_rec, cfile->rec) == 0)) {
+ /* Was an unwanted duplicate, restore buffer */
+ tmp = cfile->rec;
+ cfile->rec = new_rec;
+ new_rec = tmp;
+ tmp = cfile->end;
+ cfile->end = new_end;
+ new_end = tmp;
+ continue;
+ }
+
+ /* Write out 'old' record */
+ put(new_rec, outfp);
+ }
+
+ free(new_rec);
+}
+
+/*
+ * if delete: inserts rec in flist, deletes flist[0];
+ * otherwise just inserts *rec in flist.
+ * Returns 1 if record is a duplicate to be ignored.
+ */
+static int
+insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
+{
+ int mid, top = ttop, bot = 0, cmpv = 1;
+
+ for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
+ cmpv = cmp(rec->rec, flist[mid]->rec);
+ if (cmpv == 0 ) {
+ if (UNIQUE)
+ /* Duplicate key, read another record */
+ /* NB: This doesn't guarantee to keep any
+ * particular record. */
+ return 1;
+ /*
+ * Apply sort by input file order.
+ * We could truncate the sort is the fileno are
+ * adjacent - but that is all too hard!
+ * The fileno cannot be equal, since we only have one
+ * record from each file (+ flist[0] which never
+ * comes here).
+ */
+ cmpv = rec < flist[mid] ? -1 : 1;
+ if (REVERSE)
+ cmpv = -cmpv;
+ }
+ if (cmpv < 0)
+ top = mid;
+ else
+ bot = mid;
+ }
+
+ /* At this point we haven't yet compared against flist[0] */
+
+ if (delete) {
+ /* flist[0] is ourselves, only the caller knows the old data */
+ if (bot != 0) {
+ memmove(flist, flist + 1, bot * sizeof(MFILE *));
+ flist[bot] = rec;
+ }
+ return 0;
+ }
+
+ /* Inserting original set of records */
+
+ if (bot == 0 && cmpv != 0) {
+ /* Doesn't match flist[1], must compare with flist[0] */
+ cmpv = cmp(rec->rec, flist[0]->rec);
+ if (cmpv == 0 && UNIQUE)
+ return 1;
+ /* Add matching keys in file order (ie new is later) */
+ if (cmpv < 0)
+ bot = -1;
+ }
+ bot++;
+ memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
+ flist[bot] = rec;
+ return 0;
+}
+
+/*
+ * check order on one file
+ */
+void
+order(struct filelist *filelist, struct field *ftbl, int quiet)
+{
+ get_func_t get = SINGL_FLD ? makeline : makekey;
+ RECHEADER *crec, *prec, *trec;
+ u_char *crec_end, *prec_end, *trec_end;
+ FILE *fp;
+ int c;
+
+ fp = fopen(filelist->names[0], "r");
+ if (fp == NULL)
+ err(2, "%s", filelist->names[0]);
+
+ crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+ crec_end = crec->data + DEFLLEN;
+ prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+ prec_end = prec->data + DEFLLEN;
+
+ /* XXX this does exit(0) for overlong lines */
+ if (get(fp, prec, prec_end, ftbl) != 0)
+ exit(0);
+ while (get(fp, crec, crec_end, ftbl) == 0) {
+ if (0 < (c = cmp(prec, crec))) {
+ if (quiet)
+ exit(1);
+ crec->data[crec->length-1] = 0;
+ errx(1, "found disorder: %s", crec->data+crec->offset);
+ }
+ if (UNIQUE && !c) {
+ if (quiet)
+ exit(1);
+ crec->data[crec->length-1] = 0;
+ errx(1, "found non-uniqueness: %s",
+ crec->data+crec->offset);
+ }
+ /*
+ * Swap pointers so that this record is on place pointed
+ * to by prec and new record is read to place pointed to by
+ * crec.
+ */
+ trec = prec;
+ prec = crec;
+ crec = trec;
+ trec_end = prec_end;
+ prec_end = crec_end;
+ crec_end = trec_end;
+ }
+ exit(0);
+}
+
+static int
+cmp(RECHEADER *rec1, RECHEADER *rec2)
+{
+ int len;
+ int r;
+
+ /* key is weights */
+ len = min(rec1->keylen, rec2->keylen);
+ r = memcmp(rec1->data, rec2->data, len);
+ if (r == 0)
+ r = rec1->keylen - rec2->keylen;
+ if (REVERSE)
+ r = -r;
+ return r;
+}
diff --git a/usr.bin/sort/pathnames.h b/usr.bin/sort/pathnames.h
new file mode 100644
index 0000000..534671e
--- /dev/null
+++ b/usr.bin/sort/pathnames.h
@@ -0,0 +1,66 @@
+/* $NetBSD: pathnames.h,v 1.6 2008/04/28 20:24:15 martin Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pathnames.h 8.1 (Berkeley) 6/6/93
+ */
+
+#define _PATH_STDIN "/dev/stdin"
diff --git a/usr.bin/sort/radix_sort.c b/usr.bin/sort/radix_sort.c
new file mode 100644
index 0000000..4ec5ff8
--- /dev/null
+++ b/usr.bin/sort/radix_sort.c
@@ -0,0 +1,217 @@
+/* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */
+
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy and by Dan Bernstein at New York University,
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
+#else
+__RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * 'stable' radix sort initially from libc/stdlib/radixsort.c
+ */
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <util.h>
+#include "sort.h"
+
+typedef struct {
+ RECHEADER **sa; /* Base of saved area */
+ int sn; /* Number of entries */
+ int si; /* index into data for compare */
+} stack;
+
+static void simplesort(RECHEADER **, int, int);
+
+#define THRESHOLD 20 /* Divert to simplesort(). */
+
+#define empty(s) (s >= sp)
+#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t) t = a, a = b, b = t
+
+void
+radix_sort(RECHEADER **a, RECHEADER **ta, int n)
+{
+ u_int count[256], nc, bmin;
+ u_int c;
+ RECHEADER **ak, **tai, **lim;
+ RECHEADER *hdr;
+ int stack_size = 512;
+ stack *s, *sp, *sp0, *sp1, temp;
+ RECHEADER **top[256];
+ u_int *cp, bigc;
+ int data_index = 0;
+
+ if (n < THRESHOLD && !DEBUG('r')) {
+ simplesort(a, n, 0);
+ return;
+ }
+
+ s = emalloc(stack_size * sizeof *s);
+ memset(&count, 0, sizeof count);
+ /* Technically 'top' doesn't need zeroing */
+ memset(&top, 0, sizeof top);
+
+ sp = s;
+ push(a, n, data_index);
+ while (!empty(s)) {
+ pop(a, n, data_index);
+ if (n < THRESHOLD && !DEBUG('r')) {
+ simplesort(a, n, data_index);
+ continue;
+ }
+
+ /* Count number of times each 'next byte' occurs */
+ nc = 0;
+ bmin = 255;
+ lim = a + n;
+ for (ak = a, tai = ta; ak < lim; ak++) {
+ hdr = *ak;
+ if (data_index >= hdr->keylen) {
+ /* Short key, copy to start of output */
+ if (UNIQUE && a != sp->sa)
+ /* Stop duplicate being written out */
+ hdr->keylen = -1;
+ *a++ = hdr;
+ n--;
+ continue;
+ }
+ /* Save in temp buffer for distribute */
+ *tai++ = hdr;
+ c = hdr->data[data_index];
+ if (++count[c] == 1) {
+ if (c < bmin)
+ bmin = c;
+ nc++;
+ }
+ }
+ /*
+ * We need save the bounds for each 'next byte' that
+ * occurs more so we can sort each block.
+ */
+ if (sp + nc > s + stack_size) {
+ stack_size *= 2;
+ sp1 = erealloc(s, stack_size * sizeof *s);
+ sp = sp1 + (sp - s);
+ s = sp1;
+ }
+
+ /* Minor optimisation to do the largest set last */
+ sp0 = sp1 = sp;
+ bigc = 2;
+ /* Convert 'counts' positions, saving bounds for later sorts */
+ ak = a;
+ for (cp = count + bmin; nc > 0; cp++) {
+ while (*cp == 0)
+ cp++;
+ if ((c = *cp) > 1) {
+ if (c > bigc) {
+ bigc = c;
+ sp1 = sp;
+ }
+ push(ak, c, data_index+1);
+ }
+ ak += c;
+ top[cp-count] = ak;
+ *cp = 0; /* Reset count[]. */
+ nc--;
+ }
+ swap(*sp0, *sp1, temp);
+
+ for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
+ *--top[(*ak)->data[data_index]] = *ak;
+ }
+
+ free(s);
+}
+
+/* insertion sort, short records are sorted before long ones */
+static void
+simplesort(RECHEADER **a, int n, int data_index)
+{
+ RECHEADER **ak, **ai;
+ RECHEADER *akh;
+ RECHEADER **lim = a + n;
+ const u_char *s, *t;
+ int s_len, t_len;
+ int i;
+ int r;
+
+ if (n <= 1)
+ return;
+
+ for (ak = a+1; ak < lim; ak++) {
+ akh = *ak;
+ s = akh->data;
+ s_len = akh->keylen;
+ for (ai = ak; ;) {
+ ai--;
+ t_len = (*ai)->keylen;
+ if (t_len != -1) {
+ t = (*ai)->data;
+ for (i = data_index; ; i++) {
+ if (i >= s_len || i >= t_len) {
+ r = s_len - t_len;
+ break;
+ }
+ r = s[i] - t[i];
+ if (r != 0)
+ break;
+ }
+ if (r >= 0) {
+ if (r == 0 && UNIQUE) {
+ /* Put record below existing */
+ ai[1] = ai[0];
+ /* Mark as duplicate - ignore */
+ akh->keylen = -1;
+ } else {
+ ai++;
+ }
+ break;
+ }
+ }
+ ai[1] = ai[0];
+ if (ai == a)
+ break;
+ }
+ ai[0] = akh;
+ }
+}
diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1
new file mode 100644
index 0000000..548aafd
--- /dev/null
+++ b/usr.bin/sort/sort.1
@@ -0,0 +1,525 @@
+.\" $NetBSD: sort.1,v 1.38 2017/07/03 21:34:21 wiz Exp $
+.\"
+.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Ben Harris and Jaromir Dolecek.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\" Copyright (c) 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" the Institute of Electrical and Electronics Engineers, Inc.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
+.\"
+.Dd June 1, 2016
+.Dt SORT 1
+.Os
+.Sh NAME
+.Nm sort
+.Nd sort or merge text files
+.Sh SYNOPSIS
+.Nm
+.Op Fl bdfHilmnrSsu
+.Oo
+.Fl k
+.Ar kstart Ns Op Li \&, Ns Ar kend
+.Oc
+.Op Fl o Ar output
+.Op Fl R Ar char
+.Op Fl T Ar dir
+.Op Fl t Ar char
+.Op Ar
+.Nm
+.Fl C Ns | Ns Fl c
+.Op Fl bdfilnru
+.Oo
+.Fl k
+.Ar kstart Ns Op Li \&, Ns Ar kend
+.Op Fl t Ar char
+.Oc
+.Op Fl R Ar char
+.Op Ar file
+.Sh DESCRIPTION
+The
+.Nm
+utility sorts text files by lines.
+Comparisons are based on one or more sort keys extracted
+from each line of input, and are performed lexicographically.
+By default, if keys are not given,
+.Nm
+regards each input line as a single field.
+.Pp
+The following options are available:
+.Bl -tag -width Fl
+.It Fl C
+Identical to
+.Fl c
+without the error messages in the case of unsorted input.
+.It Fl c
+Check that the single input file is sorted.
+If the file is not sorted,
+.Nm
+produces the appropriate error messages and exits with code 1; otherwise,
+.Nm
+returns 0.
+.Nm
+.Fl c
+produces no output.
+See also
+.Fl u .
+.It Fl H
+Ignored for compatibility with earlier versions of
+.Nm .
+.It Fl m
+Merge only; the input files are assumed to be pre-sorted.
+.It Fl o Ar output
+The argument given is the name of an
+.Ar output
+file to be used instead of the standard output.
+This file can be the same as one of the input files.
+.It Fl S
+Don't use stable sort.
+Default is to use stable sort.
+.It Fl s
+Use stable sort, keeps records with equal keys in their original order.
+This is the default.
+Provided for compatibility with other
+.Nm
+implementations only.
+.It Fl T Ar dir
+Use
+.Ar dir
+as the directory for temporary files.
+The default is the value specified in the environment variable
+.Ev TMPDIR or
+.Pa /tmp
+if
+.Ev TMPDIR
+is not defined.
+.It Fl u
+Unique: suppress all but one in each set of lines having equal keys.
+If used with the
+.Fl c
+option, check that there are no lines with duplicate keys.
+.El
+.Pp
+The following options,
+which should be given before any
+.Fl k
+options, override the default ordering rules.
+When ordering options appear independent of,
+and before, key field specifications,
+the requested field ordering rules are
+applied globally to all sort keys.
+When attached to a specific key (see
+.Fl k ) ,
+the ordering options override
+all global ordering options for that key.
+.Bl -tag -width Fl
+.It Fl d
+Only blank space and alphanumeric characters
+.\" according
+.\" to the current setting of LC_CTYPE
+are used
+in making comparisons.
+.It Fl f
+Considers all lowercase characters that have uppercase
+equivalents to be the same for purposes of comparison.
+.It Fl i
+Ignore all non-printable characters.
+.It Fl l
+Sort by the string length of the field, not by the field itself.
+.It Fl n
+An initial numeric string, consisting of optional blank space, optional
+plus or minus sign, and zero or more digits (including decimal point)
+.\" with
+.\" optional radix character and thousands
+.\" separator
+.\" (as defined in the current locale),
+is sorted by arithmetic value.
+(The
+.Fl n
+option no longer implies the
+.Fl b
+option.)
+.It Fl r
+Reverse the sense of comparisons.
+.El
+.Pp
+The treatment of field separators can be altered using these options:
+.Bl -tag -width Fl
+.It Fl b
+Ignores leading blank space when determining the start
+and end of a restricted sort key.
+A
+.Fl b
+option specified before the first
+.Fl k
+option applies globally to all
+.Fl k
+options.
+Otherwise, the
+.Fl b
+option can be attached independently to each
+.Ar field
+argument of the
+.Fl k
+option (see below).
+Note that the
+.Fl b
+option has no effect unless key fields are specified.
+.It Fl k Ar kstart Ns Op Li \&, Ns Ar kend
+Designates the starting position,
+.Ar kstart ,
+and optional ending position,
+.Ar kend ,
+of a key field.
+The
+.Fl k
+option replaces the obsolescent options
+.Cm \(pl Ns Ar pos1
+and
+.Fl Ns Ar pos2 .
+.It Fl R Ar char
+.Ar char
+is used as the record separator character.
+This should be used with discretion;
+.Fl R Aq Ar alphanumeric
+usually produces undesirable results.
+If char is not a single character, then it
+specifies the value of the desired record
+separator as an integer specified in any
+of the normal NNN, 0ooo, or 0xXXX ways,
+or as an octal value preceded by \e.
+Caution: do not attempt to specify Ctl-A
+as
+.Dq -R 1
+which will not do what was intended at all!
+The default record separator is newline.
+.It Fl t Ar char
+.Ar char
+is used as the field separator character.
+The initial
+.Ar char
+is not considered to be part of a field when determining
+key offsets (see below).
+Each occurrence of
+.Ar char
+is significant (for example,
+.Dq Ar charchar
+delimits an empty field).
+If
+.Fl t
+is not specified, the default field separator is a sequence of
+blank-space characters, and consecutive blank spaces do
+.Em not
+delimit an empty field; further, the initial blank space
+.Em is
+considered part of a field when determining key offsets.
+.El
+.Pp
+The following operands are available:
+.Bl -tag -width Ar
+.It Ar file
+The pathname of a file to be sorted, merged, or checked.
+If no
+.Ar file
+operands are specified, or if
+a
+.Ar file
+operand is
+.Fl ,
+the standard input is used.
+.El
+.Pp
+A field is defined as a minimal sequence of characters followed by a
+field separator or a newline character.
+By default, the first
+blank space of a sequence of blank spaces acts as the field separator.
+All blank spaces in a sequence of blank spaces are considered
+as part of the next field; for example, all blank spaces at
+the beginning of a line are considered to be part of the
+first field.
+.Pp
+Fields are specified
+by the
+.Fl k
+.Ar kstart Ns Op \&, Ns Ar kend
+argument.
+A missing
+.Ar kend
+argument defaults to the end of a line.
+.Pp
+The arguments
+.Ar kstart
+and
+.Ar kend
+have the form
+.Ar m Ns Li \&. Ns Ar n
+and can be followed by one or more of the letters
+.Cm b , d , f , i ,
+.Cm l , n ,
+and
+.Cm r ,
+which correspond to the options discussed above.
+A
+.Ar kstart
+position specified by
+.Ar m Ns Li \&. Ns Ar n
+.Pq Ar m , n No > 0
+is interpreted as the
+.Ar n Ns th
+character in the
+.Ar m Ns th
+field.
+A missing
+.Li \&. Ns Ar n
+in
+.Ar kstart
+means
+.Ql \&.1 ,
+indicating the first character of the
+.Ar m Ns th
+field; if the
+.Fl b
+option is in effect,
+.Ar n
+is counted from the first non-blank character in the
+.Ar m Ns th
+field;
+.Ar m Ns Li \&.1b
+refers to the first non-blank character in the
+.Ar m Ns th
+field.
+.Pp
+A
+.Ar kend
+position specified by
+.Ar m Ns Li \&. Ns Ar n
+is interpreted as
+the
+.Ar n Ns th
+character (including separators) of the
+.Ar m Ns th
+field.
+A missing
+.Li \&. Ns Ar n
+indicates the last character of the
+.Ar m Ns th
+field;
+.Ar m
+= \&0
+designates the end of a line.
+Thus the option
+.Fl k
+.Sm off
+.Xo
+.Ar v Li \&. Ar x Li \&,
+.Ar w Li \&. Ar y
+.Xc
+.Sm on
+is synonymous with the obsolescent option
+.Sm off
+.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
+.Fl Ar w-\&1 Li \&. Ar y ;
+.Sm on
+when
+.Ar y
+is omitted,
+.Fl k
+.Sm off
+.Ar v Li \&. Ar x Li \&, Ar w
+.Sm on
+is synonymous with
+.Sm off
+.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1
+.Fl Ar w+1 Li \&.0 .
+.Sm on
+The obsolescent
+.Cm \(pl Ns Ar pos1
+.Fl Ns Ar pos2
+option is still supported, except for
+.Fl Ns Ar w Ns Li \&.0b ,
+which has no
+.Fl k
+equivalent.
+.Pp
+.Nm
+compares records by comparing the key fields selected by
+.Fl k
+arguments,
+from first given to last,
+until discovering a difference.
+If there are no
+.Fl k
+arguments, the whole record is treated as a single key.
+After exhausting the
+.Fl k
+arguments, if no difference has been found,
+then the result depends upon the
+.Fl u
+and
+.Fl S
+option settings.
+With
+.Fl u
+the records are considered identical, and one is supressed.
+Otherwise with
+.Fl s
+set (default) the records are left in their original order,
+or with
+.Fl S
+(posix mode) the whole record is considered as a tie breaker.
+.\"
+.\" If you fail to understand why it doesn't matter which order
+.\" the records are output when they are wholly identical, there
+.\" is nothing that this man page can say that wll help!
+.\"
+.Sh ENVIRONMENT
+If the following environment variable exists, it is used by
+.Nm .
+.Bl -tag -width Ev
+.It Ev TMPDIR
+.Nm
+uses the contents of the
+.Ev TMPDIR
+environment variable as the path in which to store
+temporary files.
+.El
+.Sh FILES
+.Bl -tag -width outputNUMBER+some -compact
+.It Pa /tmp/sort.*
+Default temporary files.
+.It Ar output Ns NUMBER
+Temporary file which is used for output if
+.Ar output
+already exists.
+Once sorting is finished, this file replaces
+.Ar output
+(via
+.Xr link 2
+and
+.Xr unlink 2 ) .
+.El
+.Sh EXIT STATUS
+Sort exits with one of the following values:
+.Bl -tag -width flag -compact
+.It 0
+Normal behavior.
+.It 1
+On disorder (or non-uniqueness) with the
+.Fl c
+(or
+.Fl C )
+option.
+.It 2
+An error occurred.
+.El
+.Sh SEE ALSO
+.Xr comm 1 ,
+.Xr join 1 ,
+.Xr uniq 1 ,
+.Xr qsort 3 ,
+.Xr radixsort 3
+.Sh HISTORY
+A
+.Nm
+command appeared in
+.At v5 .
+This
+.Nm
+implementation appeared in
+.Bx 4.4
+and is used since
+.Nx 1.6 .
+.Sh BUGS
+Posix requires the locale's thousands separator be ignored in numbers.
+It may be faster to sort very large files in pieces and then explicitly
+merge them.
+.Sh NOTES
+This
+.Nm
+has no limits on input line length (other than imposed by available
+memory) or any restrictions on bytes allowed within lines.
+.Pp
+To protect data
+.Nm
+.Fl o
+calls
+.Xr link 2
+and
+.Xr unlink 2 ,
+and thus fails on protected directories.
+.Pp
+Input files should be text files.
+If file doesn't end with record separator (which is typically newline), the
+.Nm
+utility silently supplies one.
+.Pp
+The current
+.Nm
+uses lexicographic radix sorting, which requires
+that sort keys be kept in memory (as opposed to previous versions which used quick
+and merge sorts and did not.)
+Thus performance depends highly on efficient choice of sort keys, and the
+.Fl b
+option and the
+.Ar kend
+argument of the
+.Fl k
+option should be used whenever possible.
+Similarly,
+.Nm
+.Fl k1f
+is equivalent to
+.Nm
+.Fl f
+and may take twice as long.
diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c
new file mode 100644
index 0000000..ee5ffdb
--- /dev/null
+++ b/usr.bin/sort/sort.c
@@ -0,0 +1,419 @@
+/* $NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+#ifndef lint
+__COPYRIGHT("@(#) Copyright (c) 1993\
+ The Regents of the University of California. All rights reserved.");
+#endif /* not lint */
+__RCSID("$NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $");
+
+/* Sort sorts a file using an optional user-defined key.
+ * Sort uses radix sort for internal sorting, and allows
+ * a choice of merge sort and radix sort for external sorting.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+
+#include <locale.h>
+#include <paths.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <util.h>
+
+#include "sort.h"
+#include "fsort.h"
+#include "pathnames.h"
+
+int REC_D = '\n';
+u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */
+
+/*
+ * weight tables. Gweights is one of ascii, Rascii..
+ * modified to weight rec_d = 0 (or 255)
+ */
+u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable };
+u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
+
+int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
+int REVERSE = 0;
+int posix_sort;
+
+unsigned int debug_flags = 0;
+
+static char toutpath[MAXPATHLEN];
+
+const char *tmpdir; /* where temporary files should be put */
+
+static void cleanup(void);
+static void onsignal(int);
+__dead static void usage(const char *);
+
+int
+main(int argc, char *argv[])
+{
+ int ch, i, stdinflag = 0;
+ char mode = 0;
+ char *outfile, *outpath = 0;
+ struct field *fldtab;
+ size_t fldtab_sz, fld_cnt;
+ struct filelist filelist;
+ int num_input_files;
+ FILE *outfp = NULL;
+ struct rlimit rl;
+ struct stat st;
+
+ setlocale(LC_ALL, "");
+
+ /* bump RLIMIT_NOFILE to maximum our hard limit allows */
+ if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
+ err(2, "getrlimit");
+ rl.rlim_cur = rl.rlim_max;
+ if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
+ err(2, "setrlimit");
+
+ d_mask[REC_D = '\n'] = REC_D_F;
+ d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
+
+ /* fldtab[0] is the global options. */
+ fldtab_sz = 3;
+ fld_cnt = 0;
+ fldtab = emalloc(fldtab_sz * sizeof(*fldtab));
+ memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
+
+#define SORT_OPTS "bcCdD:fHik:lmno:rR:sSt:T:u"
+
+ /* Convert "+field" args to -k format */
+ fixit(&argc, argv, SORT_OPTS);
+
+ if (!(tmpdir = getenv("TMPDIR")))
+ tmpdir = _PATH_TMP;
+
+ while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) {
+ switch (ch) {
+ case 'b':
+ fldtab[0].flags |= BI | BT;
+ break;
+ case 'c': case 'C': case 'm':
+ if (mode)
+ usage("Incompatible operation modes");
+ mode = ch;
+ break;
+ case 'D': /* Debug flags */
+ for (i = 0; optarg[i]; i++)
+ debug_flags |= 1 << (optarg[i] & 31);
+ break;
+ case 'd': case 'f': case 'i': case 'n': case 'l':
+ fldtab[0].flags |= optval(ch, 0);
+ break;
+ case 'H':
+ /* -H was ; use merge sort for blocks of large files' */
+ /* That is now the default. */
+ break;
+ case 'k':
+ fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab));
+ memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0]));
+ fldtab_sz++;
+
+ setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
+ break;
+ case 'o':
+ outpath = optarg;
+ break;
+ case 'r':
+ REVERSE = 1;
+ break;
+ case 'R':
+ if (REC_D != '\n')
+ usage("multiple record delimiters");
+ REC_D = *optarg;
+ if (optarg[1] != '\0') {
+ char *ep;
+ int t = 0;
+
+ if (optarg[0] == '\\')
+ optarg++, t = 8;
+ REC_D = (int)strtol(optarg, &ep, t);
+ if (*ep != '\0' || REC_D < 0 ||
+ REC_D >= (int)__arraycount(d_mask))
+ errx(2, "invalid record delimiter %s",
+ optarg);
+ }
+ if (REC_D == '\n')
+ break;
+ d_mask['\n'] = d_mask[' '];
+ d_mask[REC_D] = REC_D_F;
+ break;
+ case 's':
+ /*
+ * Nominally 'stable sort', keep lines with equal keys
+ * in input file order. (Default for NetBSD)
+ * (-s for GNU sort compatibility.)
+ */
+ posix_sort = 0;
+ break;
+ case 'S':
+ /*
+ * Reverse of -s!
+ * This needs to enforce a POSIX sort where records
+ * with equal keys are then sorted by the raw data.
+ * Currently not implemented!
+ * (using libc radixsort() v sradixsort() doesn't
+ * have the desired effect.)
+ */
+ posix_sort = 1;
+ break;
+ case 't':
+ if (SEP_FLAG)
+ usage("multiple field delimiters");
+ SEP_FLAG = 1;
+ d_mask[' '] &= ~FLD_D;
+ d_mask['\t'] &= ~FLD_D;
+ d_mask['\n'] &= ~FLD_D;
+ d_mask[(u_char)*optarg] |= FLD_D;
+ if (d_mask[(u_char)*optarg] & REC_D_F)
+ errx(2, "record/field delimiter clash");
+ break;
+ case 'T':
+ /* -T tmpdir */
+ tmpdir = optarg;
+ break;
+ case 'u':
+ UNIQUE = 1;
+ break;
+ case '?':
+ default:
+ usage(NULL);
+ }
+ }
+
+ if (UNIQUE)
+ /* Don't sort on raw record if keys match */
+ posix_sort = 0;
+
+ if ((mode == 'c' || mode == 'C') && argc > optind+1)
+ errx(2, "too many input files for -c option");
+ if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
+ outpath = argv[argc-1];
+ argc -= 2;
+ }
+ if (mode == 'm' && argc - optind > (MAXFCT - (16+1))*16)
+ errx(2, "too many input files for -m option");
+
+ for (i = optind; i < argc; i++) {
+ /* allow one occurrence of /dev/stdin */
+ if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) {
+ if (stdinflag)
+ warnx("ignoring extra \"%s\" in file list",
+ argv[i]);
+ else
+ stdinflag = 1;
+
+ /* change to /dev/stdin if '-' */
+ if (argv[i][0] == '-') {
+ static char path_stdin[] = _PATH_STDIN;
+ argv[i] = path_stdin;
+ }
+
+ } else if ((ch = access(argv[i], R_OK)))
+ err(2, "%s", argv[i]);
+ }
+
+ if (fldtab[1].icol.num == 0) {
+ /* No sort key specified */
+ if (fldtab[0].flags & (I|D|F|N|L)) {
+ /* Modified - generate a key that covers the line */
+ fldtab[0].flags &= ~(BI|BT);
+ setfield("1", &fldtab[++fld_cnt], fldtab->flags);
+ fldreset(fldtab);
+ } else {
+ /* Unmodified, just compare the line */
+ SINGL_FLD = 1;
+ fldtab[0].icol.num = 1;
+ }
+ } else {
+ fldreset(fldtab);
+ }
+
+ settables();
+
+ if (optind == argc) {
+ static const char * const names[] = { _PATH_STDIN, NULL };
+ filelist.names = names;
+ num_input_files = 1;
+ } else {
+ filelist.names = (const char * const *) &argv[optind];
+ num_input_files = argc - optind;
+ }
+
+ if (mode == 'c' || mode == 'C') {
+ order(&filelist, fldtab, mode == 'C');
+ /* NOT REACHED */
+ }
+
+ if (!outpath) {
+ toutpath[0] = '\0'; /* path not used in this case */
+ outfile = outpath = toutpath;
+ outfp = stdout;
+ } else if (lstat(outpath, &st) == 0
+ && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
+ /* output file exists and isn't character or block device */
+ struct sigaction act;
+ static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE,
+ SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0};
+ int outfd;
+ errno = 0;
+ if (access(outpath, W_OK))
+ err(2, "%s", outpath);
+ (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX",
+ outpath);
+ if ((outfd = mkstemp(toutpath)) == -1)
+ err(2, "Cannot create temporary file `%s'", toutpath);
+ (void)atexit(cleanup);
+ act.sa_handler = onsignal;
+ (void) sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_RESTART | SA_RESETHAND;
+ for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */
+ sigaction(sigtable[i], &act, 0);
+ outfile = toutpath;
+ if ((outfp = fdopen(outfd, "w")) == NULL)
+ err(2, "Cannot open temporary file `%s'", toutpath);
+ } else {
+ outfile = outpath;
+
+ if ((outfp = fopen(outfile, "w")) == NULL)
+ err(2, "output file %s", outfile);
+ }
+
+ if (mode == 'm')
+ fmerge(&filelist, num_input_files, outfp, fldtab);
+ else
+ fsort(&filelist, num_input_files, outfp, fldtab);
+
+ if (outfile != outpath) {
+ if (access(outfile, F_OK))
+ err(2, "%s", outfile);
+
+ /*
+ * Copy file permissions bits of the original file.
+ * st is initialized above, when we create the
+ * temporary spool file.
+ */
+ if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) {
+ err(2, "cannot chmod %s: output left in %s",
+ outpath, outfile);
+ }
+
+ (void)unlink(outpath);
+ if (link(outfile, outpath))
+ err(2, "cannot link %s: output left in %s",
+ outpath, outfile);
+ (void)unlink(outfile);
+ toutpath[0] = 0;
+ }
+ exit(0);
+}
+
+static void
+onsignal(int sig)
+{
+ cleanup();
+}
+
+static void
+cleanup(void)
+{
+ if (toutpath[0])
+ (void)unlink(toutpath);
+}
+
+static void
+usage(const char *msg)
+{
+ const char *pn = getprogname();
+
+ if (msg != NULL)
+ (void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
+ (void)fprintf(stderr,
+ "usage: %s [-bdfHilmnrSsu] [-k kstart[,kend]] [-o output]"
+ " [-R char] [-T dir]\n", pn);
+ (void)fprintf(stderr,
+ " [-t char] [file ...]\n");
+ (void)fprintf(stderr,
+ " or: %s -C|-c [-bdfilnru] [-k kstart[,kend]] [-o output]"
+ " [-R char]\n", pn);
+ (void)fprintf(stderr,
+ " [-t char] [file]\n");
+ exit(2);
+}
+
+RECHEADER *
+allocrec(RECHEADER *rec, size_t size)
+{
+
+ return (erealloc(rec, size + sizeof(long) - 1));
+}
diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h
new file mode 100644
index 0000000..d7a3144
--- /dev/null
+++ b/usr.bin/sort/sort.h
@@ -0,0 +1,201 @@
+/* $NetBSD: sort.h,v 1.36 2016/06/01 02:37:55 kre Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sort.h 8.1 (Berkeley) 6/6/93
+ */
+
+#include <sys/param.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NBINS 256
+
+/* values for masks, weights, and other flags. */
+/* R and F get used to index weight_tables[] */
+#define R 0x01 /* Field is reversed */
+#define F 0x02 /* weight lower and upper case the same */
+#define I 0x04 /* mask out non-printable characters */
+#define D 0x08 /* sort alphanumeric characters only */
+#define N 0x10 /* Field is a number */
+#define BI 0x20 /* ignore blanks in icol */
+#define BT 0x40 /* ignore blanks in tcol */
+#define L 0x80 /* Sort by field length */
+
+/* masks for delimiters: blanks, fields, and termination. */
+#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
+#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
+#define REC_D_F 4 /* '\n' default; from -R otherwise */
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+
+#define FCLOSE(file) { \
+ if (EOF == fclose(file)) \
+ err(2, "%p", file); \
+}
+
+#define EWRITE(ptr, size, n, f, fmt) { \
+ if (!fwrite(ptr, size, n, f)) \
+ err(2, fmt); \
+}
+
+/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
+ * in a sane way.
+ * Anyone who wants to sort data records longer than 2GB definitely needs a
+ * different program! */
+typedef unsigned int length_t;
+
+/* A record is a key/line pair starting at rec.data. It has a total length
+ * and an offset to the start of the line half of the pair.
+ */
+typedef struct recheader {
+ length_t length; /* total length of key and line */
+ length_t offset; /* to line */
+ int keylen; /* length of key */
+ u_char data[]; /* key then line */
+} RECHEADER;
+
+/* This is the column as seen by struct field. It is used by enterfield.
+ * They are matched with corresponding coldescs during initialization.
+ */
+struct column {
+ struct coldesc *p;
+ int num;
+ int indent;
+};
+
+/* a coldesc has a number and pointers to the beginning and end of the
+ * corresponding column in the current line. This is determined in enterkey.
+ */
+typedef struct coldesc {
+ u_char *start;
+ u_char *end;
+ int num;
+} COLDESC;
+
+/* A field has an initial and final column; an omitted final column
+ * implies the end of the line. Flags regulate omission of blanks and
+ * numerical sorts; mask determines which characters are ignored (from -i, -d);
+ * weights determines the sort weights of a character (from -f, -r).
+ *
+ * The first field contain the global flags etc.
+ * The list terminates when icol = 0.
+ */
+struct field {
+ struct column icol;
+ struct column tcol;
+ u_int flags;
+ u_char *mask;
+ u_char *weights;
+};
+
+struct filelist {
+ const char * const * names;
+};
+
+typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
+typedef void (*put_func_t)(const RECHEADER *, FILE *);
+
+extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
+extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
+extern u_char d_mask[NBINS];
+extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
+extern int posix_sort;
+extern int REC_D;
+extern const char *tmpdir;
+extern struct coldesc *clist;
+extern int ncols;
+
+#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
+extern unsigned int debug_flags;
+
+RECHEADER *allocrec(RECHEADER *, size_t);
+void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
+void concat(FILE *, FILE *);
+length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
+void fixit(int *, char **, const char *);
+void fldreset(struct field *);
+FILE *ftmp(void);
+void fmerge(struct filelist *, int, FILE *, struct field *);
+void save_for_merge(FILE *, get_func_t, struct field *);
+void merge_sort(FILE *, put_func_t, struct field *);
+void fsort(struct filelist *, int, FILE *, struct field *);
+int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
+int makekey(FILE *, RECHEADER *, u_char *, struct field *);
+int makeline(FILE *, RECHEADER *, u_char *, struct field *);
+void makeline_copydown(RECHEADER *);
+int optval(int, int);
+__dead void order(struct filelist *, struct field *, int);
+void putline(const RECHEADER *, FILE *);
+void putrec(const RECHEADER *, FILE *);
+void putkeydump(const RECHEADER *, FILE *);
+void rd_append(int, int, int, FILE *, u_char *, u_char *);
+void radix_sort(RECHEADER **, RECHEADER **, int);
+int setfield(const char *, struct field *, int);
+void settables(void);
diff --git a/usr.bin/sort/tmp.c b/usr.bin/sort/tmp.c
new file mode 100644
index 0000000..155b3f5
--- /dev/null
+++ b/usr.bin/sort/tmp.c
@@ -0,0 +1,106 @@
+/* $NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $ */
+
+/*-
+ * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Ben Harris and Jaromir Dolecek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__RCSID("$NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $");
+
+#include <sys/param.h>
+
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "sort.h"
+#include "pathnames.h"
+
+#define _NAME_TMP "sort.XXXXXXXX"
+
+FILE *
+ftmp(void)
+{
+ sigset_t set, oset;
+ FILE *fp;
+ int fd;
+ char path[MAXPATHLEN];
+
+ (void)snprintf(path, sizeof(path), "%s%s%s", tmpdir,
+ (tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP);
+
+ sigfillset(&set);
+ (void)sigprocmask(SIG_BLOCK, &set, &oset);
+ if ((fd = mkstemp(path)) < 0)
+ err(2, "ftmp: mkstemp(\"%s\")", path);
+ if (!(fp = fdopen(fd, "w+")))
+ err(2, "ftmp: fdopen(\"%s\")", path);
+ if (!DEBUG('t'))
+ (void)unlink(path);
+
+ (void)sigprocmask(SIG_SETMASK, &oset, NULL);
+ return (fp);
+}