diff options
Diffstat (limited to 'usr.bin/sort')
-rw-r--r-- | usr.bin/sort/append.c | 94 | ||||
-rw-r--r-- | usr.bin/sort/fields.c | 380 | ||||
-rw-r--r-- | usr.bin/sort/files.c | 279 | ||||
-rw-r--r-- | usr.bin/sort/fsort.c | 202 | ||||
-rw-r--r-- | usr.bin/sort/fsort.h | 78 | ||||
-rw-r--r-- | usr.bin/sort/init.c | 447 | ||||
-rw-r--r-- | usr.bin/sort/msort.c | 431 | ||||
-rw-r--r-- | usr.bin/sort/pathnames.h | 66 | ||||
-rw-r--r-- | usr.bin/sort/radix_sort.c | 217 | ||||
-rw-r--r-- | usr.bin/sort/sort.1 | 525 | ||||
-rw-r--r-- | usr.bin/sort/sort.c | 419 | ||||
-rw-r--r-- | usr.bin/sort/sort.h | 201 | ||||
-rw-r--r-- | usr.bin/sort/tmp.c | 106 |
13 files changed, 3445 insertions, 0 deletions
diff --git a/usr.bin/sort/append.c b/usr.bin/sort/append.c new file mode 100644 index 0000000..07e15e1 --- /dev/null +++ b/usr.bin/sort/append.c @@ -0,0 +1,94 @@ +/* $NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" + +__RCSID("$NetBSD: append.c,v 1.23 2009/11/06 18:34:22 joerg Exp $"); + +#include <stdlib.h> + +/* + * copy sorted lines to output + * Ignore duplicates (marked with -ve keylen) + */ +void +append(RECHEADER **keylist, int nelem, FILE *fp, put_func_t put) +{ + RECHEADER **cpos, **lastkey; + RECHEADER *crec; + + lastkey = keylist + nelem; + if (REVERSE) { + for (cpos = lastkey; cpos-- > keylist;) { + crec = *cpos; + if (crec->keylen >= 0) + put(crec, fp); + } + } else { + for (cpos = keylist; cpos < lastkey; cpos++) { + crec = *cpos; + if (crec->keylen >= 0) + put(crec, fp); + } + } +} diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c new file mode 100644 index 0000000..6208a78 --- /dev/null +++ b/usr.bin/sort/fields.c @@ -0,0 +1,380 @@ +/* $NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Subroutines to generate sort keys. */ + +#include "sort.h" + +__RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $"); + +#define SKIP_BLANKS(ptr) { \ + if (BLANK & d_mask[*(ptr)]) \ + while (BLANK & d_mask[*(++(ptr))]); \ +} + +#define NEXTCOL(pos) { \ + if (!SEP_FLAG) \ + while (BLANK & l_d_mask[*(++pos)]); \ + while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ +} + +static u_char *enterfield(u_char *, const u_char *, struct field *, int); +static u_char *number(u_char *, const u_char *, u_char *, u_char *, int); +static u_char *length(u_char *, const u_char *, u_char *, u_char *, int); + +#define DECIMAL_POINT '.' + +/* + * constructs sort key with leading recheader, followed by the key, + * followed by the original line. + */ +length_t +enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data, + size_t line_size, struct field fieldtable[]) + /* keybuf: pointer to start of key */ +{ + int i; + u_char *l_d_mask; + u_char *lineend, *pos; + const u_char *endkey; + u_char *keypos; + struct coldesc *clpos; + int col = 1; + struct field *ftpos; + + l_d_mask = d_mask; + pos = line_data - 1; + lineend = line_data + line_size-1; + /* don't include rec_delimiter */ + + for (i = 0; i < ncols; i++) { + clpos = clist + i; + for (; (col < clpos->num) && (pos < lineend); col++) { + NEXTCOL(pos); + } + if (pos >= lineend) + break; + clpos->start = SEP_FLAG ? pos + 1 : pos; + NEXTCOL(pos); + clpos->end = pos; + col++; + if (pos >= lineend) { + clpos->end = lineend; + i++; + break; + } + } + for (; i <= ncols; i++) + clist[i].start = clist[i].end = lineend; + if (clist[0].start < line_data) + clist[0].start++; + + /* + * We write the sort keys (concatenated) followed by the + * original line data (for output) as the 'keybuf' data. + * keybuf->length is the number of key bytes + data bytes. + * keybuf->offset is the number of key bytes. + * We add a record separator weight after the key in case + * (as is usual) we need to preserve the order of equal lines, + * and for 'sort -u'. + * The key itself will have had the correct weight applied. + */ + keypos = keybuf->data; + endkey = keybuf_end - line_size - 1; + if (endkey <= keypos) + /* No room for any key bytes */ + return 1; + + for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) { + if ((keypos = enterfield(keypos, endkey, ftpos, + fieldtable->flags)) == NULL) + return (1); + } + + keybuf->offset = keypos - keybuf->data; + keybuf->length = keybuf->offset + line_size; + + /* + * Posix requires that equal keys be further sorted by the + * entire original record. + * NetBSD has (at least for some time) kept equal keys in + * their original order. + * For 'sort -u' posix_sort is unset. + */ + keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset; + + memcpy(keypos, line_data, line_size); + return (0); +} + +/* + * constructs a field (as defined by -k) within a key + */ +static u_char * +enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld, + int gflags) +{ + u_char *start, *end, *lineend, *mask, *lweight; + struct column icol, tcol; + u_int flags; + + icol = cur_fld->icol; + tcol = cur_fld->tcol; + flags = cur_fld->flags; + start = icol.p->start; + lineend = clist[ncols].end; + if (flags & BI) + SKIP_BLANKS(start); + start += icol.indent; + start = min(start, lineend); + + if (!tcol.num) + end = lineend; + else { + if (tcol.indent) { + end = tcol.p->start; + if (flags & BT) + SKIP_BLANKS(end); + end += tcol.indent; + end = min(end, lineend); + } else + end = tcol.p->end; + } + + if (flags & L) + return length(tablepos, endkey, start, end, flags); + if (flags & N) + return number(tablepos, endkey, start, end, flags); + + /* Bound check space - assuming nothing is skipped */ + if (tablepos + (end - start) + 1 >= endkey) + return NULL; + + mask = cur_fld->mask; + lweight = cur_fld->weights; + for (; start < end; start++) { + if (!mask || mask[*start]) { + *tablepos++ = lweight[*start]; + } + } + /* Add extra byte (absent from lweight) to sort short keys correctly */ + *tablepos++ = lweight[REC_D]; + return tablepos; +} + +/* + * Numbers are converted to a floating point format (exponent & mantissa) + * so that they compare correctly as sequence of unsigned bytes. + * Bytes 0x00 and 0xff are used to terminate positive and negative numbers + * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12. + * + * The first byte contain the overall sign, exponent sign and some of the + * exponent. These have to be ordered (-ve value, decreasing exponent), + * zero, (+ve value, increasing exponent). + * + * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0. + * -ve values are the 1's compliments (so 0x7f isn't used!). + * + * This only leaves 63 byte values for +ve exponents - which isn't enough. + * The largest 4 exponent values are used to hold a byte count of the + * number of following bytes that contain 8 exponent bits per byte, + * This lets us sort exponents from -2^31 to +2^31. + * + * The mantissa is stored 2 digits per byte offset by 0x40, for negative + * numbers the order must be reversed (they are bit inverted). + * + * Reverse sorts are done by inverting the sign of the number. + */ +#define MAX_EXP_ENC ((int)sizeof(int)) + +static u_char * +number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, + int reverse) +{ + int exponent = -1; + int had_dp = 0; + u_char *tline; + char ch; + unsigned int val; + u_char *last_nz_pos; + u_char negate; + + if (reverse & R) + negate = 0xff; + else + negate = 0; + + /* Give ourselves space for the key terminator */ + bufend--; + + /* Ensure we have enough space for the exponent */ + if (pos + 1 + MAX_EXP_ENC > bufend) + return (NULL); + + SKIP_BLANKS(line); + if (*line == '-') { /* set the sign */ + negate ^= 0xff; + line++; + } else if (*line == '+') { + line++; + } + + /* eat initial zeroes */ + for (; *line == '0' && line < lineend; line++) + continue; + + /* calculate exponents */ + if (*line == DECIMAL_POINT) { + /* Decimal fraction */ + had_dp = 1; + while (*++line == '0' && line < lineend) + exponent--; + } else { + /* Large (absolute) value, count digits */ + for (tline = line; *tline >= '0' && + *tline <= '9' && tline < lineend; tline++) + exponent++; + } + + /* If the first/next character isn't a digit, value is zero */ + if (*line < '1' || *line > '9' || line >= lineend) { + /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */ + /* XXX what about NaN, NAN, inf and INF */ + *pos++ = 0x80; + return pos; + } + + /* Maybe here we should allow for e+12 (etc) */ + + if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) { + /* Value ok for simple encoding */ + /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */ + exponent += 0xc0; + *pos++ = negate ^ exponent; + } else { + /* Out or range for a single byte */ + int c, t; + t = exponent > 0 ? exponent : -exponent; + /* Count how many 8-bit bytes are needed */ + for (c = 0; ; c++) { + t >>= 8; + if (t == 0) + break; + } + /* 'c' better be 0..3 here - but probably 0..1 */ + /* Offset just outside valid range */ + t = c + 0x40 - MAX_EXP_ENC; + if (exponent < 0) + t = -t; + *pos++ = negate ^ (t + 0xc0); + /* now add each byte, most significant first */ + for (; c >= 0; c--) + *pos++ = negate ^ (exponent >> (c * 8)); + } + + /* Finally add mantissa, 2 digits per byte */ + for (last_nz_pos = pos; line < lineend; ) { + if (pos >= bufend) + return NULL; + ch = *line++; + val = (ch - '0') * 10; + if (val > 90) { + if (ch == DECIMAL_POINT && !had_dp) { + had_dp = 1; + continue; + } + break; + } + while (line < lineend) { + ch = *line++; + if (ch == DECIMAL_POINT && !had_dp) { + had_dp = 1; + continue; + } + if (ch < '0' || ch > '9') + line = lineend; + else + val += ch - '0'; + break; + } + *pos++ = negate ^ (val + 0x40); + if (val != 0) + last_nz_pos = pos; + } + + /* Add key terminator, deleting any trailing "00" */ + *last_nz_pos++ = negate; + + return (last_nz_pos); +} + +static u_char * +length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, + int flag) +{ + u_char buf[32]; + int l; + SKIP_BLANKS(line); + l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line); + return number(pos, bufend, buf, buf + l, flag); +} diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c new file mode 100644 index 0000000..7cb27c5 --- /dev/null +++ b/usr.bin/sort/files.c @@ -0,0 +1,279 @@ +/* $NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" +#include "fsort.h" + +__RCSID("$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $"); + +#include <string.h> + +/* Align records in temporary files to avoid misaligned copies */ +#define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1)) + +static ssize_t seq(FILE *, u_char **); + +/* + * this is called when there is no special key. It's only called + * in the first fsort pass. + */ + +static u_char *opos; +static size_t osz; + +void +makeline_copydown(RECHEADER *recbuf) +{ + memmove(recbuf->data, opos, osz); +} + +int +makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2) +{ + u_char *pos; + int c; + + pos = recbuf->data; + if (osz != 0) { + /* + * Buffer shortage is solved by either of two ways: + * o flush previous buffered data and start using the + * buffer from start. + * makeline_copydown() above must be called. + * o realloc buffer + * + * This code has relied on realloc changing 'bufend', + * but that isn't necessarily true. + */ + pos += osz; + osz = 0; + } + + while (pos < bufend) { + c = getc(fp); + if (c == EOF) { + if (pos == recbuf->data) { + FCLOSE(fp); + return EOF; + } + /* Add terminator to partial line */ + c = REC_D; + } + *pos++ = c; + if (c == REC_D) { + recbuf->offset = 0; + recbuf->length = pos - recbuf->data; + recbuf->keylen = recbuf->length - 1; + return (0); + } + } + + /* Ran out of buffer space... */ + if (recbuf->data < bufend) { + /* Remember where the partial record is */ + osz = pos - recbuf->data; + opos = recbuf->data; + } + return (BUFFEND); +} + +/* + * This generates keys. It's only called in the first fsort pass + */ +int +makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl) +{ + static u_char *line_data; + static ssize_t line_size; + static int overflow = 0; + + /* We get re-entered after returning BUFFEND - save old data */ + if (overflow) { + overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); + return overflow ? BUFFEND : 0; + } + + line_size = seq(fp, &line_data); + if (line_size == 0) { + FCLOSE(fp); + return EOF; + } + + if (line_size > bufend - recbuf->data) { + overflow = 1; + } else { + overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl); + } + return overflow ? BUFFEND : 0; +} + +/* + * get a line of input from fp + */ +static ssize_t +seq(FILE *fp, u_char **line) +{ + static u_char *buf; + static size_t buf_size = DEFLLEN; + u_char *end, *pos; + int c; + u_char *new_buf; + + if (!buf) { + /* one-time initialization */ + buf = malloc(buf_size); + if (!buf) + err(2, "malloc of linebuf for %zu bytes failed", + buf_size); + } + + end = buf + buf_size; + pos = buf; + while ((c = getc(fp)) != EOF) { + *pos++ = c; + if (c == REC_D) { + *line = buf; + return pos - buf; + } + if (pos == end) { + /* Long line - double size of buffer */ + /* XXX: Check here for stupidly long lines */ + buf_size *= 2; + new_buf = realloc(buf, buf_size); + if (!new_buf) + err(2, "realloc of linebuf to %zu bytes failed", + buf_size); + + end = new_buf + buf_size; + pos = new_buf + (pos - buf); + buf = new_buf; + } + } + + if (pos != buf) { + /* EOF part way through line - add line terminator */ + *pos++ = REC_D; + *line = buf; + return pos - buf; + } + + return 0; +} + +/* + * write a key/line pair to a temporary file + */ +void +putrec(const RECHEADER *rec, FILE *fp) +{ + EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp, + "failed to write temp file"); +} + +/* + * write a line to output + */ +void +putline(const RECHEADER *rec, FILE *fp) +{ + EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp, + "failed to write"); +} + +/* + * write dump of key to output (for -Dk) + */ +void +putkeydump(const RECHEADER *rec, FILE *fp) +{ + EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp, + "failed to write debug key"); +} + +/* + * get a record from a temporary file. (Used by merge sort.) + */ +int +geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2) +{ + length_t file_len; + int i; + + (void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]); + + if ((u_char *)(rec + 1) > end) + return (BUFFEND); + if (!fread(&rec->length, 1, sizeof rec->length, fp)) { + fclose(fp); + return (EOF); + } + file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length); + if (end - rec->data < (ptrdiff_t)file_len) { + for (i = sizeof rec->length - 1; i >= 0; i--) + ungetc(*((char *) rec + i), fp); + return (BUFFEND); + } + + fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp); + return (0); +} diff --git a/usr.bin/sort/fsort.c b/usr.bin/sort/fsort.c new file mode 100644 index 0000000..c229ae2 --- /dev/null +++ b/usr.bin/sort/fsort.c @@ -0,0 +1,202 @@ +/* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Read in a block of records (until 'enough'). + * sort, write to temp file. + * Merge sort temp files into output file + * Small files miss out the temp file stage. + * Large files might get multiple merges. + */ +#include "sort.h" +#include "fsort.h" + +__RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $"); + +#include <stdlib.h> +#include <string.h> + +#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) + +void +fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) +{ + RECHEADER **keylist; + RECHEADER **keypos, **keyp; + RECHEADER *buffer; + size_t bufsize = DEFBUFSIZE; + u_char *bufend; + int mfct = 0; + int c, nelem; + get_func_t get; + RECHEADER *crec; + RECHEADER *nbuffer; + FILE *fp, *tmp_fp; + int file_no; + int max_recs = DEBUG('m') ? 16 : MAXNUM; + + buffer = allocrec(NULL, bufsize); + bufend = (u_char *)buffer + bufsize; + /* Allocate double length keymap for radix_sort */ + keylist = malloc(2 * max_recs * sizeof(*keylist)); + if (buffer == NULL || keylist == NULL) + err(2, "failed to malloc initial buffer or keylist"); + + if (SINGL_FLD) + /* Key and data are one! */ + get = makeline; + else + /* Key (merged key fields) added before data */ + get = makekey; + + file_no = 0; + fp = fopen(filelist->names[0], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[0]); + + /* Loop through reads of chunk of input files that get sorted + * and then merged together. */ + for (;;) { + keypos = keylist; + nelem = 0; + crec = buffer; + makeline_copydown(crec); + + /* Loop reading records */ + for (;;) { + c = get(fp, crec, bufend, ftbl); + /* 'c' is 0, EOF or BUFFEND */ + if (c == 0) { + /* Save start of key in input buffer */ + *keypos++ = crec; + if (++nelem == max_recs) { + c = BUFFEND; + break; + } + crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); + continue; + } + if (c == EOF) { + /* try next file */ + if (++file_no >= nfiles) + /* no more files */ + break; + fp = fopen(filelist->names[file_no], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[file_no]); + continue; + } + if (nelem >= max_recs + || (bufsize >= MAXBUFSIZE && nelem > 8)) + /* Need to sort and save this lot of data */ + break; + + /* c == BUFFEND, and we can process more data */ + /* Allocate a larger buffer for this lot of data */ + bufsize *= 2; + nbuffer = allocrec(buffer, bufsize); + if (!nbuffer) { + err(2, "failed to realloc buffer to %zu bytes", + bufsize); + } + + /* patch up keylist[] */ + for (keyp = &keypos[-1]; keyp >= keylist; keyp--) + *keyp = nbuffer + (*keyp - buffer); + + crec = nbuffer + (crec - buffer); + buffer = nbuffer; + bufend = (u_char *)buffer + bufsize; + } + + /* Sort this set of records */ + radix_sort(keylist, keylist + max_recs, nelem); + + if (c == EOF && mfct == 0) { + /* all the data is (sorted) in the buffer */ + append(keylist, nelem, outfp, + DEBUG('k') ? putkeydump : putline); + break; + } + + /* Save current data to a temporary file for a later merge */ + if (nelem != 0) { + tmp_fp = ftmp(); + append(keylist, nelem, tmp_fp, putrec); + save_for_merge(tmp_fp, geteasy, ftbl); + } + mfct = 1; + + if (c == EOF) { + /* merge to output file */ + merge_sort(outfp, + DEBUG('k') ? putkeydump : putline, ftbl); + break; + } + } + + free(keylist); + keylist = NULL; + free(buffer); + buffer = NULL; +} diff --git a/usr.bin/sort/fsort.h b/usr.bin/sort/fsort.h new file mode 100644 index 0000000..3d8ef4c --- /dev/null +++ b/usr.bin/sort/fsort.h @@ -0,0 +1,78 @@ +/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fsort.h 8.1 (Berkeley) 6/6/93 + */ + +#define BUFSIZE (1<<20) +#define MAXNUM 131072 /* low guess at average record count */ +#define BUFFEND (EOF-2) +#define MAXFCT 1000 +#define DEFLLEN 65536 + +/* + * Default (initial) and maximum size of record buffer for fsort(). + * Note that no more than MAXNUM records are stored in the buffer, + * even if the buffer is not full yet. + */ +#define DEFBUFSIZE (1 << 20) /* 1MB */ +#define MAXBUFSIZE (8 << 20) /* 10 MB */ diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c new file mode 100644 index 0000000..ff1799b --- /dev/null +++ b/usr.bin/sort/init.c @@ -0,0 +1,447 @@ +/* $NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" + +__RCSID("$NetBSD: init.c,v 1.29 2013/10/18 20:47:06 christos Exp $"); + +#include <ctype.h> +#include <string.h> + +static void insertcol(struct field *); +static const char *setcolumn(const char *, struct field *); + +/* + * masks of ignored characters. + */ +static u_char dtable[NBINS], itable[NBINS]; + +/* + * parsed key options + */ +struct coldesc *clist = NULL; +int ncols = 0; + +/* + * clist (list of columns which correspond to one or more icol or tcol) + * is in increasing order of columns. + * Fields are kept in increasing order of fields. + */ + +/* + * keep clist in order--inserts a column in a sorted array + */ +static void +insertcol(struct field *field) +{ + int i; + struct coldesc *p; + + /* Make space for new item */ + p = realloc(clist, (ncols + 2) * sizeof(*clist)); + if (!p) + err(1, "realloc"); + clist = p; + memset(&clist[ncols], 0, sizeof(clist[ncols])); + + for (i = 0; i < ncols; i++) + if (field->icol.num <= clist[i].num) + break; + if (field->icol.num != clist[i].num) { + memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->icol.num; + ncols++; + } + if (field->tcol.num && field->tcol.num != field->icol.num) { + for (i = 0; i < ncols; i++) + if (field->tcol.num <= clist[i].num) + break; + if (field->tcol.num != clist[i].num) { + memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->tcol.num; + ncols++; + } + } +} + +/* + * matches fields with the appropriate columns--n^2 but who cares? + */ +void +fldreset(struct field *fldtab) +{ + int i; + + fldtab[0].tcol.p = clist + ncols - 1; + for (++fldtab; fldtab->icol.num; ++fldtab) { + for (i = 0; fldtab->icol.num != clist[i].num; i++) + ; + fldtab->icol.p = clist + i; + if (!fldtab->tcol.num) + continue; + for (i = 0; fldtab->tcol.num != clist[i].num; i++) + ; + fldtab->tcol.p = clist + i; + } +} + +/* + * interprets a column in a -k field + */ +static const char * +setcolumn(const char *pos, struct field *cur_fld) +{ + struct column *col; + char *npos; + int tmp; + col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol); + col->num = (int) strtol(pos, &npos, 10); + pos = npos; + if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol))) + errx(2, "field numbers must be positive"); + if (*pos == '.') { + if (!col->num) + errx(2, "cannot indent end of line"); + ++pos; + col->indent = (int) strtol(pos, &npos, 10); + pos = npos; + if (&cur_fld->icol == col) + col->indent--; + if (col->indent < 0) + errx(2, "illegal offset"); + } + for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++) + cur_fld->flags |= tmp; + if (cur_fld->icol.num == 0) + cur_fld->icol.num = 1; + return (pos); +} + +int +setfield(const char *pos, struct field *cur_fld, int gflag) +{ + cur_fld->mask = NULL; + + pos = setcolumn(pos, cur_fld); + if (*pos == '\0') /* key extends to EOL. */ + cur_fld->tcol.num = 0; + else { + if (*pos != ',') + errx(2, "illegal field descriptor"); + setcolumn((++pos), cur_fld); + } + if (!cur_fld->flags) + cur_fld->flags = gflag; + if (REVERSE) + /* A local 'r' doesn't invert the global one */ + cur_fld->flags &= ~R; + + /* Assign appropriate mask table and weight table. */ + cur_fld->weights = weight_tables[cur_fld->flags & (R | F)]; + if (cur_fld->flags & I) + cur_fld->mask = itable; + else if (cur_fld->flags & D) + cur_fld->mask = dtable; + + cur_fld->flags |= (gflag & (BI | BT)); + if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ + cur_fld->flags &= ~BT; + + if (cur_fld->tcol.num + && !(!(cur_fld->flags & BI) && cur_fld->flags & BT) + && (cur_fld->tcol.num <= cur_fld->icol.num + /* indent if 0 -> end of field, i.e. okay */ + && cur_fld->tcol.indent != 0 + && cur_fld->tcol.indent < cur_fld->icol.indent)) + errx(2, "fields out of order"); + + insertcol(cur_fld); + return (cur_fld->tcol.num); +} + +int +optval(int desc, int tcolflag) +{ + switch(desc) { + case 'b': + if (!tcolflag) + return BI; + else + return BT; + case 'd': return D; + case 'f': return F; + case 'i': return I; + case 'l': return L; + case 'n': return N; + case 'r': return R; + default: return 0; + } +} + +/* + * Return true if the options found in ARG, according to the getopt + * spec in OPTS, require an additional argv word as an option + * argument. + */ +static int +options_need_argument(const char *arg, const char *opts) +{ + size_t pos; + const char *s; + + /*assert(arg[0] == '-');*/ + + pos = 1; + while (arg[pos]) { + s = strchr(opts, arg[pos]); + if (s == NULL) { + /* invalid option */ + return 0; + } + if (s[1] == ':') { + /* option requires argument */ + if (arg[pos+1] == '\0') { + /* no argument in this arg */ + return 1; + } + else { + /* argument is in this arg; no more options */ + return 0; + } + } + pos++; + } + return 0; +} + +/* + * Replace historic +SPEC arguments with appropriate -kSPEC. + * + * The form can be either a single +SPEC or a pair +SPEC -SPEC. + * The following -SPEC is not recognized unless it follows + * immediately. + */ +void +fixit(int *argc, char **argv, const char *opts) +{ + int i, j, sawplus; + char *vpos, *tpos, spec[20]; + int col, indent; + + sawplus = 0; + for (i = 1; i < *argc; i++) { + /* + * This loop must stop exactly where getopt will stop. + * Otherwise it turns e.g. "sort x +3" into "sort x + * -k4.1", which will croak if +3 was in fact really a + * file name. In order to do this reliably we need to + * be able to identify argv words that are option + * arguments. + */ + + if (!strcmp(argv[i], "--")) { + /* End of options; stop. */ + break; + } + + if (argv[i][0] == '+') { + /* +POS argument */ + sawplus = 1; + } else if (argv[i][0] == '-' && sawplus && + isdigit((unsigned char)argv[i][1])) { + /* -POS argument */ + sawplus = 0; + } else if (argv[i][0] == '-') { + /* other option */ + sawplus = 0; + if (options_need_argument(argv[i], opts)) { + /* skip over the argument */ + i++; + } + continue; + } else { + /* not an option at all; stop */ + sawplus = 0; + break; + } + + /* + * At this point argv[i] is an old-style spec. The + * sawplus flag used by the above loop logic also + * tells us if it's a +SPEC or -SPEC. + */ + + /* parse spec */ + tpos = argv[i]+1; + col = (int)strtol(tpos, &tpos, 10); + if (*tpos == '.') { + ++tpos; + indent = (int) strtol(tpos, &tpos, 10); + } else + indent = 0; + /* tpos now points to the optional flags */ + + /* + * In the traditional form, x.0 means beginning of line; + * in the new form, x.0 means end of line. Adjust the + * value of INDENT accordingly. + */ + if (sawplus) { + /* +POS */ + col += 1; + indent += 1; + } else { + /* -POS */ + if (indent > 0) + col += 1; + } + + /* make the new style spec */ + (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent, + tpos); + + if (sawplus) { + /* Replace the +POS argument with new-style -kSPEC */ + asprintf(&vpos, "-k%s", spec); + argv[i] = vpos; + } else { + /* + * Append the spec to the one from the + * preceding +POS argument, and remove the + * current argv element entirely. + */ + asprintf(&vpos, "%s,%s", argv[i-1], spec); + free(argv[i-1]); + argv[i-1] = vpos; + for (j=i; j < *argc; j++) + argv[j] = argv[j+1]; + *argc -= 1; + i--; + } + } +} + +/* + * ascii, Rascii, Ftable, and RFtable map + * + * Sorting 'weight' tables. + * Convert 'ascii' characters into their sort order. + * The 'F' variants fold lower case to upper equivalent + * The 'R' variants are for reverse sorting. + * + * The record separator (REC_D) never needs a weight, this frees one + * byte value as an 'end of key' marker. This must be 0 for normal + * weight tables, and 0xff for reverse weight tables - and is used + * to terminate keys so that short keys sort before (after if reverse) + * longer keys. + * + * The field separator has a normal weight - although it cannot occur + * within a key unless it is the default (space+tab). + * + * All other bytes map to the appropriate value for the sort order. + * Numeric sorts don't need any tables, they are reversed by negation. + * + * Global reverse sorts are done by writing the sorted keys in reverse + * order - the sort itself is stil forwards. + * This means that weights are only ever used when generating keys, any + * sort of the original data bytes is always forwards and unweighted. + * + * Note: this is only good for ASCII sorting. For different LC 's, + * all bets are off. + * + * itable[] and dtable[] are the masks for -i (ignore non-printables) + * and -d (only sort blank and alphanumerics). + */ +void +settables(void) +{ + int i; + int next_weight = 1; + int rev_weight = 254; + + ascii[REC_D] = 0; + Rascii[REC_D] = 255; + Ftable[REC_D] = 0; + RFtable[REC_D] = 255; + + for (i = 0; i < 256; i++) { + if (i == REC_D) + continue; + ascii[i] = next_weight; + Rascii[i] = rev_weight; + if (Ftable[i] == 0) { + Ftable[i] = next_weight; + RFtable[i] = rev_weight; + Ftable[tolower(i)] = next_weight; + RFtable[tolower(i)] = rev_weight; + } + next_weight++; + rev_weight--; + + if (i == '\n' || isprint(i)) + itable[i] = 1; + + if (i == '\n' || i == '\t' || i == ' ' || isalnum(i)) + dtable[i] = 1; + } +} diff --git a/usr.bin/sort/msort.c b/usr.bin/sort/msort.c new file mode 100644 index 0000000..6cbcf90 --- /dev/null +++ b/usr.bin/sort/msort.c @@ -0,0 +1,431 @@ +/* $NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" +#include "fsort.h" + +__RCSID("$NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $"); + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <util.h> + +/* Subroutines using comparisons: merge sort and check order */ +#define DELETE (1) + +typedef struct mfile { + FILE *fp; + get_func_t get; + RECHEADER *rec; + u_char *end; +} MFILE; + +static int cmp(RECHEADER *, RECHEADER *); +static int insert(struct mfile **, struct mfile *, int, int); +static void merge_sort_fstack(FILE *, put_func_t, struct field *); + +/* + * Number of files merge() can merge in one pass. + */ +#define MERGE_FNUM 16 + +static struct mfile fstack[MERGE_FNUM]; +static struct mfile fstack_1[MERGE_FNUM]; +static struct mfile fstack_2[MERGE_FNUM]; +static int fstack_count, fstack_1_count, fstack_2_count; + +void +save_for_merge(FILE *fp, get_func_t get, struct field *ftbl) +{ + FILE *mfp, *mfp1, *mfp2; + + if (fstack_count == MERGE_FNUM) { + /* Must reduce the number of temporary files */ + mfp = ftmp(); + merge_sort_fstack(mfp, putrec, ftbl); + /* Save output in next layer */ + if (fstack_1_count == MERGE_FNUM) { + mfp1 = ftmp(); + memcpy(fstack, fstack_1, sizeof fstack); + merge_sort_fstack(mfp1, putrec, ftbl); + if (fstack_2_count == MERGE_FNUM) { + /* More than 4096 files! */ + mfp2 = ftmp(); + memcpy(fstack, fstack_2, sizeof fstack); + merge_sort_fstack(mfp2, putrec, ftbl); + fstack_2[0].fp = mfp2; + fstack_2_count = 1; + } + fstack_2[fstack_2_count].fp = mfp1; + fstack_2[fstack_2_count].get = geteasy; + fstack_2_count++; + fstack_1_count = 0; + } + fstack_1[fstack_1_count].fp = mfp; + fstack_1[fstack_1_count].get = geteasy; + fstack_1_count++; + fstack_count = 0; + } + + fstack[fstack_count].fp = fp; + fstack[fstack_count++].get = get; +} + +void +fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) +{ + get_func_t get = SINGL_FLD ? makeline : makekey; + FILE *fp; + int i; + + for (i = 0; i < nfiles; i++) { + fp = fopen(filelist->names[i], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[i]); + save_for_merge(fp, get, ftbl); + } + + merge_sort(outfp, putline, ftbl); +} + +void +merge_sort(FILE *outfp, put_func_t put, struct field *ftbl) +{ + int count = fstack_1_count + fstack_2_count; + FILE *mfp; + int i; + + if (count == 0) { + /* All files in initial array */ + merge_sort_fstack(outfp, put, ftbl); + return; + } + + count += fstack_count; + + /* Too many files for one merge sort */ + for (;;) { + /* Sort latest 16 files */ + i = count; + if (i > MERGE_FNUM) + i = MERGE_FNUM; + while (fstack_count > 0) + fstack[--i] = fstack[--fstack_count]; + while (i > 0 && fstack_1_count > 0) + fstack[--i] = fstack_1[--fstack_1_count]; + while (i > 0) + fstack[--i] = fstack_2[--fstack_2_count]; + if (count <= MERGE_FNUM) { + /* Got all the data */ + fstack_count = count; + merge_sort_fstack(outfp, put, ftbl); + return; + } + mfp = ftmp(); + fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count; + merge_sort_fstack(mfp, putrec, ftbl); + fstack[0].fp = mfp; + fstack[0].get = geteasy; + fstack_count = 1; + count -= MERGE_FNUM - 1; + } +} + +static void +merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl) +{ + struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile; + RECHEADER *new_rec; + u_char *new_end; + void *tmp; + int c, i, nfiles; + size_t sz; + + /* Read one record from each file (read again if a duplicate) */ + for (nfiles = i = 0; i < fstack_count; i++) { + cfile = &fstack[i]; + if (cfile->rec == NULL) { + cfile->rec = allocrec(NULL, DEFLLEN); + cfile->end = (u_char *)cfile->rec + DEFLLEN; + } + rewind(cfile->fp); + + for (;;) { + c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl); + if (c == EOF) + break; + + if (c == BUFFEND) { + /* Double buffer size */ + sz = (cfile->end - (u_char *)cfile->rec) * 2; + cfile->rec = allocrec(cfile->rec, sz); + cfile->end = (u_char *)cfile->rec + sz; + continue; + } + + if (nfiles != 0) { + if (insert(flist, cfile, nfiles, !DELETE)) + /* Duplicate removed */ + continue; + } else + flist[0] = cfile; + nfiles++; + break; + } + } + + if (nfiles == 0) + return; + + /* + * We now loop reading a new record from the file with the + * 'sorted first' existing record. + * As each record is added, the 'first' record is written to the + * output file - maintaining one record from each file in the sorted + * list. + */ + new_rec = allocrec(NULL, DEFLLEN); + new_end = (u_char *)new_rec + DEFLLEN; + for (;;) { + cfile = flist[0]; + c = cfile->get(cfile->fp, new_rec, new_end, ftbl); + if (c == EOF) { + /* Write out last record from now-empty input */ + put(cfile->rec, outfp); + if (--nfiles == 0) + break; + /* Replace from file with now-first sorted record. */ + /* (Moving base 'flist' saves copying everything!) */ + flist++; + continue; + } + if (c == BUFFEND) { + /* Buffer not large enough - double in size */ + sz = (new_end - (u_char *)new_rec) * 2; + new_rec = allocrec(new_rec, sz); + new_end = (u_char *)new_rec +sz; + continue; + } + + /* Swap in new buffer, saving old */ + tmp = cfile->rec; + cfile->rec = new_rec; + new_rec = tmp; + tmp = cfile->end; + cfile->end = new_end; + new_end = tmp; + + /* Add into sort, removing the original first entry */ + c = insert(flist, cfile, nfiles, DELETE); + if (c != 0 || (UNIQUE && cfile == flist[0] + && cmp(new_rec, cfile->rec) == 0)) { + /* Was an unwanted duplicate, restore buffer */ + tmp = cfile->rec; + cfile->rec = new_rec; + new_rec = tmp; + tmp = cfile->end; + cfile->end = new_end; + new_end = tmp; + continue; + } + + /* Write out 'old' record */ + put(new_rec, outfp); + } + + free(new_rec); +} + +/* + * if delete: inserts rec in flist, deletes flist[0]; + * otherwise just inserts *rec in flist. + * Returns 1 if record is a duplicate to be ignored. + */ +static int +insert(struct mfile **flist, struct mfile *rec, int ttop, int delete) +{ + int mid, top = ttop, bot = 0, cmpv = 1; + + for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) { + cmpv = cmp(rec->rec, flist[mid]->rec); + if (cmpv == 0 ) { + if (UNIQUE) + /* Duplicate key, read another record */ + /* NB: This doesn't guarantee to keep any + * particular record. */ + return 1; + /* + * Apply sort by input file order. + * We could truncate the sort is the fileno are + * adjacent - but that is all too hard! + * The fileno cannot be equal, since we only have one + * record from each file (+ flist[0] which never + * comes here). + */ + cmpv = rec < flist[mid] ? -1 : 1; + if (REVERSE) + cmpv = -cmpv; + } + if (cmpv < 0) + top = mid; + else + bot = mid; + } + + /* At this point we haven't yet compared against flist[0] */ + + if (delete) { + /* flist[0] is ourselves, only the caller knows the old data */ + if (bot != 0) { + memmove(flist, flist + 1, bot * sizeof(MFILE *)); + flist[bot] = rec; + } + return 0; + } + + /* Inserting original set of records */ + + if (bot == 0 && cmpv != 0) { + /* Doesn't match flist[1], must compare with flist[0] */ + cmpv = cmp(rec->rec, flist[0]->rec); + if (cmpv == 0 && UNIQUE) + return 1; + /* Add matching keys in file order (ie new is later) */ + if (cmpv < 0) + bot = -1; + } + bot++; + memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *)); + flist[bot] = rec; + return 0; +} + +/* + * check order on one file + */ +void +order(struct filelist *filelist, struct field *ftbl, int quiet) +{ + get_func_t get = SINGL_FLD ? makeline : makekey; + RECHEADER *crec, *prec, *trec; + u_char *crec_end, *prec_end, *trec_end; + FILE *fp; + int c; + + fp = fopen(filelist->names[0], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[0]); + + crec = malloc(offsetof(RECHEADER, data[DEFLLEN])); + crec_end = crec->data + DEFLLEN; + prec = malloc(offsetof(RECHEADER, data[DEFLLEN])); + prec_end = prec->data + DEFLLEN; + + /* XXX this does exit(0) for overlong lines */ + if (get(fp, prec, prec_end, ftbl) != 0) + exit(0); + while (get(fp, crec, crec_end, ftbl) == 0) { + if (0 < (c = cmp(prec, crec))) { + if (quiet) + exit(1); + crec->data[crec->length-1] = 0; + errx(1, "found disorder: %s", crec->data+crec->offset); + } + if (UNIQUE && !c) { + if (quiet) + exit(1); + crec->data[crec->length-1] = 0; + errx(1, "found non-uniqueness: %s", + crec->data+crec->offset); + } + /* + * Swap pointers so that this record is on place pointed + * to by prec and new record is read to place pointed to by + * crec. + */ + trec = prec; + prec = crec; + crec = trec; + trec_end = prec_end; + prec_end = crec_end; + crec_end = trec_end; + } + exit(0); +} + +static int +cmp(RECHEADER *rec1, RECHEADER *rec2) +{ + int len; + int r; + + /* key is weights */ + len = min(rec1->keylen, rec2->keylen); + r = memcmp(rec1->data, rec2->data, len); + if (r == 0) + r = rec1->keylen - rec2->keylen; + if (REVERSE) + r = -r; + return r; +} diff --git a/usr.bin/sort/pathnames.h b/usr.bin/sort/pathnames.h new file mode 100644 index 0000000..534671e --- /dev/null +++ b/usr.bin/sort/pathnames.h @@ -0,0 +1,66 @@ +/* $NetBSD: pathnames.h,v 1.6 2008/04/28 20:24:15 martin Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pathnames.h 8.1 (Berkeley) 6/6/93 + */ + +#define _PATH_STDIN "/dev/stdin" diff --git a/usr.bin/sort/radix_sort.c b/usr.bin/sort/radix_sort.c new file mode 100644 index 0000000..4ec5ff8 --- /dev/null +++ b/usr.bin/sort/radix_sort.c @@ -0,0 +1,217 @@ +/* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy and by Dan Bernstein at New York University, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; +#else +__RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + +/* + * 'stable' radix sort initially from libc/stdlib/radixsort.c + */ + +#include <sys/types.h> + +#include <assert.h> +#include <errno.h> +#include <util.h> +#include "sort.h" + +typedef struct { + RECHEADER **sa; /* Base of saved area */ + int sn; /* Number of entries */ + int si; /* index into data for compare */ +} stack; + +static void simplesort(RECHEADER **, int, int); + +#define THRESHOLD 20 /* Divert to simplesort(). */ + +#define empty(s) (s >= sp) +#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si +#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i +#define swap(a, b, t) t = a, a = b, b = t + +void +radix_sort(RECHEADER **a, RECHEADER **ta, int n) +{ + u_int count[256], nc, bmin; + u_int c; + RECHEADER **ak, **tai, **lim; + RECHEADER *hdr; + int stack_size = 512; + stack *s, *sp, *sp0, *sp1, temp; + RECHEADER **top[256]; + u_int *cp, bigc; + int data_index = 0; + + if (n < THRESHOLD && !DEBUG('r')) { + simplesort(a, n, 0); + return; + } + + s = emalloc(stack_size * sizeof *s); + memset(&count, 0, sizeof count); + /* Technically 'top' doesn't need zeroing */ + memset(&top, 0, sizeof top); + + sp = s; + push(a, n, data_index); + while (!empty(s)) { + pop(a, n, data_index); + if (n < THRESHOLD && !DEBUG('r')) { + simplesort(a, n, data_index); + continue; + } + + /* Count number of times each 'next byte' occurs */ + nc = 0; + bmin = 255; + lim = a + n; + for (ak = a, tai = ta; ak < lim; ak++) { + hdr = *ak; + if (data_index >= hdr->keylen) { + /* Short key, copy to start of output */ + if (UNIQUE && a != sp->sa) + /* Stop duplicate being written out */ + hdr->keylen = -1; + *a++ = hdr; + n--; + continue; + } + /* Save in temp buffer for distribute */ + *tai++ = hdr; + c = hdr->data[data_index]; + if (++count[c] == 1) { + if (c < bmin) + bmin = c; + nc++; + } + } + /* + * We need save the bounds for each 'next byte' that + * occurs more so we can sort each block. + */ + if (sp + nc > s + stack_size) { + stack_size *= 2; + sp1 = erealloc(s, stack_size * sizeof *s); + sp = sp1 + (sp - s); + s = sp1; + } + + /* Minor optimisation to do the largest set last */ + sp0 = sp1 = sp; + bigc = 2; + /* Convert 'counts' positions, saving bounds for later sorts */ + ak = a; + for (cp = count + bmin; nc > 0; cp++) { + while (*cp == 0) + cp++; + if ((c = *cp) > 1) { + if (c > bigc) { + bigc = c; + sp1 = sp; + } + push(ak, c, data_index+1); + } + ak += c; + top[cp-count] = ak; + *cp = 0; /* Reset count[]. */ + nc--; + } + swap(*sp0, *sp1, temp); + + for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ + *--top[(*ak)->data[data_index]] = *ak; + } + + free(s); +} + +/* insertion sort, short records are sorted before long ones */ +static void +simplesort(RECHEADER **a, int n, int data_index) +{ + RECHEADER **ak, **ai; + RECHEADER *akh; + RECHEADER **lim = a + n; + const u_char *s, *t; + int s_len, t_len; + int i; + int r; + + if (n <= 1) + return; + + for (ak = a+1; ak < lim; ak++) { + akh = *ak; + s = akh->data; + s_len = akh->keylen; + for (ai = ak; ;) { + ai--; + t_len = (*ai)->keylen; + if (t_len != -1) { + t = (*ai)->data; + for (i = data_index; ; i++) { + if (i >= s_len || i >= t_len) { + r = s_len - t_len; + break; + } + r = s[i] - t[i]; + if (r != 0) + break; + } + if (r >= 0) { + if (r == 0 && UNIQUE) { + /* Put record below existing */ + ai[1] = ai[0]; + /* Mark as duplicate - ignore */ + akh->keylen = -1; + } else { + ai++; + } + break; + } + } + ai[1] = ai[0]; + if (ai == a) + break; + } + ai[0] = akh; + } +} diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1 new file mode 100644 index 0000000..548aafd --- /dev/null +++ b/usr.bin/sort/sort.1 @@ -0,0 +1,525 @@ +.\" $NetBSD: sort.1,v 1.38 2017/07/03 21:34:21 wiz Exp $ +.\" +.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Ben Harris and Jaromir Dolecek. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd June 1, 2016 +.Dt SORT 1 +.Os +.Sh NAME +.Nm sort +.Nd sort or merge text files +.Sh SYNOPSIS +.Nm +.Op Fl bdfHilmnrSsu +.Oo +.Fl k +.Ar kstart Ns Op Li \&, Ns Ar kend +.Oc +.Op Fl o Ar output +.Op Fl R Ar char +.Op Fl T Ar dir +.Op Fl t Ar char +.Op Ar +.Nm +.Fl C Ns | Ns Fl c +.Op Fl bdfilnru +.Oo +.Fl k +.Ar kstart Ns Op Li \&, Ns Ar kend +.Op Fl t Ar char +.Oc +.Op Fl R Ar char +.Op Ar file +.Sh DESCRIPTION +The +.Nm +utility sorts text files by lines. +Comparisons are based on one or more sort keys extracted +from each line of input, and are performed lexicographically. +By default, if keys are not given, +.Nm +regards each input line as a single field. +.Pp +The following options are available: +.Bl -tag -width Fl +.It Fl C +Identical to +.Fl c +without the error messages in the case of unsorted input. +.It Fl c +Check that the single input file is sorted. +If the file is not sorted, +.Nm +produces the appropriate error messages and exits with code 1; otherwise, +.Nm +returns 0. +.Nm +.Fl c +produces no output. +See also +.Fl u . +.It Fl H +Ignored for compatibility with earlier versions of +.Nm . +.It Fl m +Merge only; the input files are assumed to be pre-sorted. +.It Fl o Ar output +The argument given is the name of an +.Ar output +file to be used instead of the standard output. +This file can be the same as one of the input files. +.It Fl S +Don't use stable sort. +Default is to use stable sort. +.It Fl s +Use stable sort, keeps records with equal keys in their original order. +This is the default. +Provided for compatibility with other +.Nm +implementations only. +.It Fl T Ar dir +Use +.Ar dir +as the directory for temporary files. +The default is the value specified in the environment variable +.Ev TMPDIR or +.Pa /tmp +if +.Ev TMPDIR +is not defined. +.It Fl u +Unique: suppress all but one in each set of lines having equal keys. +If used with the +.Fl c +option, check that there are no lines with duplicate keys. +.El +.Pp +The following options, +which should be given before any +.Fl k +options, override the default ordering rules. +When ordering options appear independent of, +and before, key field specifications, +the requested field ordering rules are +applied globally to all sort keys. +When attached to a specific key (see +.Fl k ) , +the ordering options override +all global ordering options for that key. +.Bl -tag -width Fl +.It Fl d +Only blank space and alphanumeric characters +.\" according +.\" to the current setting of LC_CTYPE +are used +in making comparisons. +.It Fl f +Considers all lowercase characters that have uppercase +equivalents to be the same for purposes of comparison. +.It Fl i +Ignore all non-printable characters. +.It Fl l +Sort by the string length of the field, not by the field itself. +.It Fl n +An initial numeric string, consisting of optional blank space, optional +plus or minus sign, and zero or more digits (including decimal point) +.\" with +.\" optional radix character and thousands +.\" separator +.\" (as defined in the current locale), +is sorted by arithmetic value. +(The +.Fl n +option no longer implies the +.Fl b +option.) +.It Fl r +Reverse the sense of comparisons. +.El +.Pp +The treatment of field separators can be altered using these options: +.Bl -tag -width Fl +.It Fl b +Ignores leading blank space when determining the start +and end of a restricted sort key. +A +.Fl b +option specified before the first +.Fl k +option applies globally to all +.Fl k +options. +Otherwise, the +.Fl b +option can be attached independently to each +.Ar field +argument of the +.Fl k +option (see below). +Note that the +.Fl b +option has no effect unless key fields are specified. +.It Fl k Ar kstart Ns Op Li \&, Ns Ar kend +Designates the starting position, +.Ar kstart , +and optional ending position, +.Ar kend , +of a key field. +The +.Fl k +option replaces the obsolescent options +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 . +.It Fl R Ar char +.Ar char +is used as the record separator character. +This should be used with discretion; +.Fl R Aq Ar alphanumeric +usually produces undesirable results. +If char is not a single character, then it +specifies the value of the desired record +separator as an integer specified in any +of the normal NNN, 0ooo, or 0xXXX ways, +or as an octal value preceded by \e. +Caution: do not attempt to specify Ctl-A +as +.Dq -R 1 +which will not do what was intended at all! +The default record separator is newline. +.It Fl t Ar char +.Ar char +is used as the field separator character. +The initial +.Ar char +is not considered to be part of a field when determining +key offsets (see below). +Each occurrence of +.Ar char +is significant (for example, +.Dq Ar charchar +delimits an empty field). +If +.Fl t +is not specified, the default field separator is a sequence of +blank-space characters, and consecutive blank spaces do +.Em not +delimit an empty field; further, the initial blank space +.Em is +considered part of a field when determining key offsets. +.El +.Pp +The following operands are available: +.Bl -tag -width Ar +.It Ar file +The pathname of a file to be sorted, merged, or checked. +If no +.Ar file +operands are specified, or if +a +.Ar file +operand is +.Fl , +the standard input is used. +.El +.Pp +A field is defined as a minimal sequence of characters followed by a +field separator or a newline character. +By default, the first +blank space of a sequence of blank spaces acts as the field separator. +All blank spaces in a sequence of blank spaces are considered +as part of the next field; for example, all blank spaces at +the beginning of a line are considered to be part of the +first field. +.Pp +Fields are specified +by the +.Fl k +.Ar kstart Ns Op \&, Ns Ar kend +argument. +A missing +.Ar kend +argument defaults to the end of a line. +.Pp +The arguments +.Ar kstart +and +.Ar kend +have the form +.Ar m Ns Li \&. Ns Ar n +and can be followed by one or more of the letters +.Cm b , d , f , i , +.Cm l , n , +and +.Cm r , +which correspond to the options discussed above. +A +.Ar kstart +position specified by +.Ar m Ns Li \&. Ns Ar n +.Pq Ar m , n No > 0 +is interpreted as the +.Ar n Ns th +character in the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +in +.Ar kstart +means +.Ql \&.1 , +indicating the first character of the +.Ar m Ns th +field; if the +.Fl b +option is in effect, +.Ar n +is counted from the first non-blank character in the +.Ar m Ns th +field; +.Ar m Ns Li \&.1b +refers to the first non-blank character in the +.Ar m Ns th +field. +.Pp +A +.Ar kend +position specified by +.Ar m Ns Li \&. Ns Ar n +is interpreted as +the +.Ar n Ns th +character (including separators) of the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +indicates the last character of the +.Ar m Ns th +field; +.Ar m += \&0 +designates the end of a line. +Thus the option +.Fl k +.Sm off +.Xo +.Ar v Li \&. Ar x Li \&, +.Ar w Li \&. Ar y +.Xc +.Sm on +is synonymous with the obsolescent option +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w-\&1 Li \&. Ar y ; +.Sm on +when +.Ar y +is omitted, +.Fl k +.Sm off +.Ar v Li \&. Ar x Li \&, Ar w +.Sm on +is synonymous with +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w+1 Li \&.0 . +.Sm on +The obsolescent +.Cm \(pl Ns Ar pos1 +.Fl Ns Ar pos2 +option is still supported, except for +.Fl Ns Ar w Ns Li \&.0b , +which has no +.Fl k +equivalent. +.Pp +.Nm +compares records by comparing the key fields selected by +.Fl k +arguments, +from first given to last, +until discovering a difference. +If there are no +.Fl k +arguments, the whole record is treated as a single key. +After exhausting the +.Fl k +arguments, if no difference has been found, +then the result depends upon the +.Fl u +and +.Fl S +option settings. +With +.Fl u +the records are considered identical, and one is supressed. +Otherwise with +.Fl s +set (default) the records are left in their original order, +or with +.Fl S +(posix mode) the whole record is considered as a tie breaker. +.\" +.\" If you fail to understand why it doesn't matter which order +.\" the records are output when they are wholly identical, there +.\" is nothing that this man page can say that wll help! +.\" +.Sh ENVIRONMENT +If the following environment variable exists, it is used by +.Nm . +.Bl -tag -width Ev +.It Ev TMPDIR +.Nm +uses the contents of the +.Ev TMPDIR +environment variable as the path in which to store +temporary files. +.El +.Sh FILES +.Bl -tag -width outputNUMBER+some -compact +.It Pa /tmp/sort.* +Default temporary files. +.It Ar output Ns NUMBER +Temporary file which is used for output if +.Ar output +already exists. +Once sorting is finished, this file replaces +.Ar output +(via +.Xr link 2 +and +.Xr unlink 2 ) . +.El +.Sh EXIT STATUS +Sort exits with one of the following values: +.Bl -tag -width flag -compact +.It 0 +Normal behavior. +.It 1 +On disorder (or non-uniqueness) with the +.Fl c +(or +.Fl C ) +option. +.It 2 +An error occurred. +.El +.Sh SEE ALSO +.Xr comm 1 , +.Xr join 1 , +.Xr uniq 1 , +.Xr qsort 3 , +.Xr radixsort 3 +.Sh HISTORY +A +.Nm +command appeared in +.At v5 . +This +.Nm +implementation appeared in +.Bx 4.4 +and is used since +.Nx 1.6 . +.Sh BUGS +Posix requires the locale's thousands separator be ignored in numbers. +It may be faster to sort very large files in pieces and then explicitly +merge them. +.Sh NOTES +This +.Nm +has no limits on input line length (other than imposed by available +memory) or any restrictions on bytes allowed within lines. +.Pp +To protect data +.Nm +.Fl o +calls +.Xr link 2 +and +.Xr unlink 2 , +and thus fails on protected directories. +.Pp +Input files should be text files. +If file doesn't end with record separator (which is typically newline), the +.Nm +utility silently supplies one. +.Pp +The current +.Nm +uses lexicographic radix sorting, which requires +that sort keys be kept in memory (as opposed to previous versions which used quick +and merge sorts and did not.) +Thus performance depends highly on efficient choice of sort keys, and the +.Fl b +option and the +.Ar kend +argument of the +.Fl k +option should be used whenever possible. +Similarly, +.Nm +.Fl k1f +is equivalent to +.Nm +.Fl f +and may take twice as long. diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c new file mode 100644 index 0000000..ee5ffdb --- /dev/null +++ b/usr.bin/sort/sort.c @@ -0,0 +1,419 @@ +/* $NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ +__RCSID("$NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $"); + +/* Sort sorts a file using an optional user-defined key. + * Sort uses radix sort for internal sorting, and allows + * a choice of merge sort and radix sort for external sorting. + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/stat.h> +#include <sys/resource.h> + +#include <locale.h> +#include <paths.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <util.h> + +#include "sort.h" +#include "fsort.h" +#include "pathnames.h" + +int REC_D = '\n'; +u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ + +/* + * weight tables. Gweights is one of ascii, Rascii.. + * modified to weight rec_d = 0 (or 255) + */ +u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; +u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; + +int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; +int REVERSE = 0; +int posix_sort; + +unsigned int debug_flags = 0; + +static char toutpath[MAXPATHLEN]; + +const char *tmpdir; /* where temporary files should be put */ + +static void cleanup(void); +static void onsignal(int); +__dead static void usage(const char *); + +int +main(int argc, char *argv[]) +{ + int ch, i, stdinflag = 0; + char mode = 0; + char *outfile, *outpath = 0; + struct field *fldtab; + size_t fldtab_sz, fld_cnt; + struct filelist filelist; + int num_input_files; + FILE *outfp = NULL; + struct rlimit rl; + struct stat st; + + setlocale(LC_ALL, ""); + + /* bump RLIMIT_NOFILE to maximum our hard limit allows */ + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) + err(2, "getrlimit"); + rl.rlim_cur = rl.rlim_max; + if (setrlimit(RLIMIT_NOFILE, &rl) < 0) + err(2, "setrlimit"); + + d_mask[REC_D = '\n'] = REC_D_F; + d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; + + /* fldtab[0] is the global options. */ + fldtab_sz = 3; + fld_cnt = 0; + fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); + memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); + +#define SORT_OPTS "bcCdD:fHik:lmno:rR:sSt:T:u" + + /* Convert "+field" args to -k format */ + fixit(&argc, argv, SORT_OPTS); + + if (!(tmpdir = getenv("TMPDIR"))) + tmpdir = _PATH_TMP; + + while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { + switch (ch) { + case 'b': + fldtab[0].flags |= BI | BT; + break; + case 'c': case 'C': case 'm': + if (mode) + usage("Incompatible operation modes"); + mode = ch; + break; + case 'D': /* Debug flags */ + for (i = 0; optarg[i]; i++) + debug_flags |= 1 << (optarg[i] & 31); + break; + case 'd': case 'f': case 'i': case 'n': case 'l': + fldtab[0].flags |= optval(ch, 0); + break; + case 'H': + /* -H was ; use merge sort for blocks of large files' */ + /* That is now the default. */ + break; + case 'k': + fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); + memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); + fldtab_sz++; + + setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); + break; + case 'o': + outpath = optarg; + break; + case 'r': + REVERSE = 1; + break; + case 'R': + if (REC_D != '\n') + usage("multiple record delimiters"); + REC_D = *optarg; + if (optarg[1] != '\0') { + char *ep; + int t = 0; + + if (optarg[0] == '\\') + optarg++, t = 8; + REC_D = (int)strtol(optarg, &ep, t); + if (*ep != '\0' || REC_D < 0 || + REC_D >= (int)__arraycount(d_mask)) + errx(2, "invalid record delimiter %s", + optarg); + } + if (REC_D == '\n') + break; + d_mask['\n'] = d_mask[' ']; + d_mask[REC_D] = REC_D_F; + break; + case 's': + /* + * Nominally 'stable sort', keep lines with equal keys + * in input file order. (Default for NetBSD) + * (-s for GNU sort compatibility.) + */ + posix_sort = 0; + break; + case 'S': + /* + * Reverse of -s! + * This needs to enforce a POSIX sort where records + * with equal keys are then sorted by the raw data. + * Currently not implemented! + * (using libc radixsort() v sradixsort() doesn't + * have the desired effect.) + */ + posix_sort = 1; + break; + case 't': + if (SEP_FLAG) + usage("multiple field delimiters"); + SEP_FLAG = 1; + d_mask[' '] &= ~FLD_D; + d_mask['\t'] &= ~FLD_D; + d_mask['\n'] &= ~FLD_D; + d_mask[(u_char)*optarg] |= FLD_D; + if (d_mask[(u_char)*optarg] & REC_D_F) + errx(2, "record/field delimiter clash"); + break; + case 'T': + /* -T tmpdir */ + tmpdir = optarg; + break; + case 'u': + UNIQUE = 1; + break; + case '?': + default: + usage(NULL); + } + } + + if (UNIQUE) + /* Don't sort on raw record if keys match */ + posix_sort = 0; + + if ((mode == 'c' || mode == 'C') && argc > optind+1) + errx(2, "too many input files for -c option"); + if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { + outpath = argv[argc-1]; + argc -= 2; + } + if (mode == 'm' && argc - optind > (MAXFCT - (16+1))*16) + errx(2, "too many input files for -m option"); + + for (i = optind; i < argc; i++) { + /* allow one occurrence of /dev/stdin */ + if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { + if (stdinflag) + warnx("ignoring extra \"%s\" in file list", + argv[i]); + else + stdinflag = 1; + + /* change to /dev/stdin if '-' */ + if (argv[i][0] == '-') { + static char path_stdin[] = _PATH_STDIN; + argv[i] = path_stdin; + } + + } else if ((ch = access(argv[i], R_OK))) + err(2, "%s", argv[i]); + } + + if (fldtab[1].icol.num == 0) { + /* No sort key specified */ + if (fldtab[0].flags & (I|D|F|N|L)) { + /* Modified - generate a key that covers the line */ + fldtab[0].flags &= ~(BI|BT); + setfield("1", &fldtab[++fld_cnt], fldtab->flags); + fldreset(fldtab); + } else { + /* Unmodified, just compare the line */ + SINGL_FLD = 1; + fldtab[0].icol.num = 1; + } + } else { + fldreset(fldtab); + } + + settables(); + + if (optind == argc) { + static const char * const names[] = { _PATH_STDIN, NULL }; + filelist.names = names; + num_input_files = 1; + } else { + filelist.names = (const char * const *) &argv[optind]; + num_input_files = argc - optind; + } + + if (mode == 'c' || mode == 'C') { + order(&filelist, fldtab, mode == 'C'); + /* NOT REACHED */ + } + + if (!outpath) { + toutpath[0] = '\0'; /* path not used in this case */ + outfile = outpath = toutpath; + outfp = stdout; + } else if (lstat(outpath, &st) == 0 + && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { + /* output file exists and isn't character or block device */ + struct sigaction act; + static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, + SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; + int outfd; + errno = 0; + if (access(outpath, W_OK)) + err(2, "%s", outpath); + (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", + outpath); + if ((outfd = mkstemp(toutpath)) == -1) + err(2, "Cannot create temporary file `%s'", toutpath); + (void)atexit(cleanup); + act.sa_handler = onsignal; + (void) sigemptyset(&act.sa_mask); + act.sa_flags = SA_RESTART | SA_RESETHAND; + for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ + sigaction(sigtable[i], &act, 0); + outfile = toutpath; + if ((outfp = fdopen(outfd, "w")) == NULL) + err(2, "Cannot open temporary file `%s'", toutpath); + } else { + outfile = outpath; + + if ((outfp = fopen(outfile, "w")) == NULL) + err(2, "output file %s", outfile); + } + + if (mode == 'm') + fmerge(&filelist, num_input_files, outfp, fldtab); + else + fsort(&filelist, num_input_files, outfp, fldtab); + + if (outfile != outpath) { + if (access(outfile, F_OK)) + err(2, "%s", outfile); + + /* + * Copy file permissions bits of the original file. + * st is initialized above, when we create the + * temporary spool file. + */ + if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { + err(2, "cannot chmod %s: output left in %s", + outpath, outfile); + } + + (void)unlink(outpath); + if (link(outfile, outpath)) + err(2, "cannot link %s: output left in %s", + outpath, outfile); + (void)unlink(outfile); + toutpath[0] = 0; + } + exit(0); +} + +static void +onsignal(int sig) +{ + cleanup(); +} + +static void +cleanup(void) +{ + if (toutpath[0]) + (void)unlink(toutpath); +} + +static void +usage(const char *msg) +{ + const char *pn = getprogname(); + + if (msg != NULL) + (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); + (void)fprintf(stderr, + "usage: %s [-bdfHilmnrSsu] [-k kstart[,kend]] [-o output]" + " [-R char] [-T dir]\n", pn); + (void)fprintf(stderr, + " [-t char] [file ...]\n"); + (void)fprintf(stderr, + " or: %s -C|-c [-bdfilnru] [-k kstart[,kend]] [-o output]" + " [-R char]\n", pn); + (void)fprintf(stderr, + " [-t char] [file]\n"); + exit(2); +} + +RECHEADER * +allocrec(RECHEADER *rec, size_t size) +{ + + return (erealloc(rec, size + sizeof(long) - 1)); +} diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h new file mode 100644 index 0000000..d7a3144 --- /dev/null +++ b/usr.bin/sort/sort.h @@ -0,0 +1,201 @@ +/* $NetBSD: sort.h,v 1.36 2016/06/01 02:37:55 kre Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sort.h 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/param.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NBINS 256 + +/* values for masks, weights, and other flags. */ +/* R and F get used to index weight_tables[] */ +#define R 0x01 /* Field is reversed */ +#define F 0x02 /* weight lower and upper case the same */ +#define I 0x04 /* mask out non-printable characters */ +#define D 0x08 /* sort alphanumeric characters only */ +#define N 0x10 /* Field is a number */ +#define BI 0x20 /* ignore blanks in icol */ +#define BT 0x40 /* ignore blanks in tcol */ +#define L 0x80 /* Sort by field length */ + +/* masks for delimiters: blanks, fields, and termination. */ +#define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */ +#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */ +#define REC_D_F 4 /* '\n' default; from -R otherwise */ + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + +#define FCLOSE(file) { \ + if (EOF == fclose(file)) \ + err(2, "%p", file); \ +} + +#define EWRITE(ptr, size, n, f, fmt) { \ + if (!fwrite(ptr, size, n, f)) \ + err(2, fmt); \ +} + +/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort + * in a sane way. + * Anyone who wants to sort data records longer than 2GB definitely needs a + * different program! */ +typedef unsigned int length_t; + +/* A record is a key/line pair starting at rec.data. It has a total length + * and an offset to the start of the line half of the pair. + */ +typedef struct recheader { + length_t length; /* total length of key and line */ + length_t offset; /* to line */ + int keylen; /* length of key */ + u_char data[]; /* key then line */ +} RECHEADER; + +/* This is the column as seen by struct field. It is used by enterfield. + * They are matched with corresponding coldescs during initialization. + */ +struct column { + struct coldesc *p; + int num; + int indent; +}; + +/* a coldesc has a number and pointers to the beginning and end of the + * corresponding column in the current line. This is determined in enterkey. + */ +typedef struct coldesc { + u_char *start; + u_char *end; + int num; +} COLDESC; + +/* A field has an initial and final column; an omitted final column + * implies the end of the line. Flags regulate omission of blanks and + * numerical sorts; mask determines which characters are ignored (from -i, -d); + * weights determines the sort weights of a character (from -f, -r). + * + * The first field contain the global flags etc. + * The list terminates when icol = 0. + */ +struct field { + struct column icol; + struct column tcol; + u_int flags; + u_char *mask; + u_char *weights; +}; + +struct filelist { + const char * const * names; +}; + +typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *); +typedef void (*put_func_t)(const RECHEADER *, FILE *); + +extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; +extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */ +extern u_char d_mask[NBINS]; +extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE; +extern int posix_sort; +extern int REC_D; +extern const char *tmpdir; +extern struct coldesc *clist; +extern int ncols; + +#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31))) +extern unsigned int debug_flags; + +RECHEADER *allocrec(RECHEADER *, size_t); +void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *)); +void concat(FILE *, FILE *); +length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *); +void fixit(int *, char **, const char *); +void fldreset(struct field *); +FILE *ftmp(void); +void fmerge(struct filelist *, int, FILE *, struct field *); +void save_for_merge(FILE *, get_func_t, struct field *); +void merge_sort(FILE *, put_func_t, struct field *); +void fsort(struct filelist *, int, FILE *, struct field *); +int geteasy(FILE *, RECHEADER *, u_char *, struct field *); +int makekey(FILE *, RECHEADER *, u_char *, struct field *); +int makeline(FILE *, RECHEADER *, u_char *, struct field *); +void makeline_copydown(RECHEADER *); +int optval(int, int); +__dead void order(struct filelist *, struct field *, int); +void putline(const RECHEADER *, FILE *); +void putrec(const RECHEADER *, FILE *); +void putkeydump(const RECHEADER *, FILE *); +void rd_append(int, int, int, FILE *, u_char *, u_char *); +void radix_sort(RECHEADER **, RECHEADER **, int); +int setfield(const char *, struct field *, int); +void settables(void); diff --git a/usr.bin/sort/tmp.c b/usr.bin/sort/tmp.c new file mode 100644 index 0000000..155b3f5 --- /dev/null +++ b/usr.bin/sort/tmp.c @@ -0,0 +1,106 @@ +/* $NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $ */ + +/*- + * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Ben Harris and Jaromir Dolecek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +__RCSID("$NetBSD: tmp.c,v 1.16 2009/11/06 18:34:22 joerg Exp $"); + +#include <sys/param.h> + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "sort.h" +#include "pathnames.h" + +#define _NAME_TMP "sort.XXXXXXXX" + +FILE * +ftmp(void) +{ + sigset_t set, oset; + FILE *fp; + int fd; + char path[MAXPATHLEN]; + + (void)snprintf(path, sizeof(path), "%s%s%s", tmpdir, + (tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP); + + sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); + if ((fd = mkstemp(path)) < 0) + err(2, "ftmp: mkstemp(\"%s\")", path); + if (!(fp = fdopen(fd, "w+"))) + err(2, "ftmp: fdopen(\"%s\")", path); + if (!DEBUG('t')) + (void)unlink(path); + + (void)sigprocmask(SIG_SETMASK, &oset, NULL); + return (fp); +} |