diff options
author | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-01 22:55:37 +0000 |
---|---|---|
committer | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-03 18:22:05 -0600 |
commit | 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch) | |
tree | 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/uniq | |
download | userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip |
initial population
Diffstat (limited to 'usr.bin/uniq')
-rw-r--r-- | usr.bin/uniq/uniq.1 | 130 | ||||
-rw-r--r-- | usr.bin/uniq/uniq.c | 266 |
2 files changed, 396 insertions, 0 deletions
diff --git a/usr.bin/uniq/uniq.1 b/usr.bin/uniq/uniq.1 new file mode 100644 index 0000000..bf1a78c --- /dev/null +++ b/usr.bin/uniq/uniq.1 @@ -0,0 +1,130 @@ +.\" $NetBSD: uniq.1,v 1.13 2012/08/26 14:13:50 wiz Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)uniq.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd August 26, 2012 +.Dt UNIQ 1 +.Os +.Sh NAME +.Nm uniq +.Nd report or filter out repeated lines in a file +.Sh SYNOPSIS +.Nm +.Op Fl cdu +.Op Fl f Ar fields +.Op Fl s Ar chars +.Oo +.Ar input_file +.Op Ar output_file +.Oc +.Sh DESCRIPTION +The +.Nm +utility reads the standard input comparing adjacent lines, and writes +a copy of each unique input line to the standard output. +The second and succeeding copies of identical adjacent input lines are +not written. +Repeated lines in the input will not be detected if they are not adjacent, +so it may be necessary to sort the files first. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +Precede each output line with the count of the number of times the line +occurred in the input, followed by a single space. +.It Fl d +Don't output lines that are not repeated in the input. +.It Fl f Ar fields +Ignore the first +.Ar fields +in each input line when doing comparisons. +A field is a string of non-blank characters separated from adjacent fields +by blanks. +Field numbers are one based, i.e., the first field is field one. +.It Fl s Ar chars +Ignore the first +.Ar chars +characters in each input line when doing comparisons. +If specified in conjunction with the +.Fl f +option, the first +.Ar chars +characters after the first +.Ar fields +fields will be ignored. +Character numbers are one based, i.e., the first character is character one. +.It Fl u +Don't output lines that are repeated in the input. +.\".It Fl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl f ) . +.\"Ignore the first n +.\"fields on each input line when doing comparisons, +.\"where n is a number. +.\"A field is a string of non-blank +.\"characters separated from adjacent fields +.\"by blanks. +.\".It Cm \&\(pl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl s ) . +.\"Ignore the first +.\".Ar m +.\"characters when doing comparisons, where +.\".Ar m +.\"is a +.\"number. +.El +.Pp +Additional arguments +.Ar input_file +and +.Ar output_file +may be specified on the command line, +where the former is then used as the name of an input file, +and the latter as the name of an output file. +.Sh EXIT STATUS +.Ex -std +.Sh COMPATIBILITY +The historic +.Cm \&\(pl Ns Ar number +and +.Fl Ns Ar number +options have been deprecated but are still supported in this implementation. +.Sh SEE ALSO +.Xr sort 1 +.Sh STANDARDS +The +.Nm +utility is expected to be +.St -p1003.2 +compatible. diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c new file mode 100644 index 0000000..ded7037 --- /dev/null +++ b/usr.bin/uniq/uniq.c @@ -0,0 +1,266 @@ +/* $NetBSD: uniq.c,v 1.20 2016/10/16 06:17:51 abhinav Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Case Larsen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: uniq.c,v 1.20 2016/10/16 06:17:51 abhinav Exp $"); +#endif /* not lint */ + +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static int cflag, dflag, uflag; +static int numchars, numfields, repeats; + +static FILE *file(const char *, const char *); +static void show(FILE *, const char *); +static const char *skip(const char *, size_t *); +static void obsolete(char *[]); +static void usage(void) __dead; + +int +main (int argc, char *argv[]) +{ + const char *prevp, *thisp; + FILE *ifp, *ofp; + int ch; + char *prevline, *thisline, *p; + size_t prevlinesize, thislinesize, psize; + size_t prevlen, thislen; + + setprogname(argv[0]); + ifp = ofp = NULL; + obsolete(argv); + while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1) + switch (ch) { + case '-': + --optind; + goto done; + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'f': + numfields = strtol(optarg, &p, 10); + if (numfields < 0 || *p) + errx(1, "illegal field skip value: %s", optarg); + break; + case 's': + numchars = strtol(optarg, &p, 10); + if (numchars < 0 || *p) + errx(1, "illegal character skip value: %s", + optarg); + break; + case 'u': + uflag = 1; + break; + case '?': + default: + usage(); + } + +done: argc -= optind; + argv +=optind; + + switch(argc) { + case 0: + ifp = stdin; + ofp = stdout; + break; + case 1: + ifp = file(argv[0], "r"); + ofp = stdout; + break; + case 2: + ifp = file(argv[0], "r"); + ofp = file(argv[1], "w"); + break; + default: + usage(); + } + + if ((p = fgetln(ifp, &psize)) == NULL) + return 0; + prevlinesize = prevlen = psize; + if ((prevline = malloc(prevlinesize + 1)) == NULL) + err(1, "malloc"); + (void)memcpy(prevline, p, prevlinesize); + prevline[prevlinesize] = '\0'; + + if (numfields || numchars) + prevp = skip(prevline, &prevlen); + else + prevp = prevline; + + thislinesize = psize; + if ((thisline = malloc(thislinesize + 1)) == NULL) + err(1, "malloc"); + + while ((p = fgetln(ifp, &psize)) != NULL) { + if (psize > thislinesize) { + if ((thisline = realloc(thisline, psize + 1)) == NULL) + err(1, "realloc"); + thislinesize = psize; + } + thislen = psize; + (void)memcpy(thisline, p, psize); + thisline[psize] = '\0'; + + /* If requested get the chosen fields + character offsets. */ + if (numfields || numchars) { + thisp = skip(thisline, &thislen); + } else { + thisp = thisline; + } + + /* If different, print; set previous to new value. */ + if (thislen != prevlen || strcmp(thisp, prevp)) { + char *t; + size_t ts; + + show(ofp, prevline); + t = prevline; + prevline = thisline; + thisline = t; + ts = prevlinesize; + prevlinesize = thislinesize; + thislinesize = ts; + prevp = thisp; + prevlen = thislen; + repeats = 0; + } else + ++repeats; + } + show(ofp, prevline); + free(prevline); + free(thisline); + return 0; +} + +/* + * show -- + * Output a line depending on the flags and number of repetitions + * of the line. + */ +static void +show(FILE *ofp, const char *str) +{ + + if ((dflag && repeats == 0) || (uflag && repeats > 0)) + return; + if (cflag) { + (void)fprintf(ofp, "%4d %s", repeats + 1, str); + } else { + (void)fprintf(ofp, "%s", str); + } +} + +static const char * +skip(const char *str, size_t *linesize) +{ + int infield, nchars, nfields; + size_t ls = *linesize; + + for (nfields = numfields, infield = 0; nfields && *str; ++str, --ls) + if (isspace((unsigned char)*str)) { + if (infield) { + infield = 0; + --nfields; + } + } else if (!infield) + infield = 1; + for (nchars = numchars; nchars-- && *str; ++str, --ls) + continue; + *linesize = ls; + return str; +} + +static FILE * +file(const char *name, const char *mode) +{ + FILE *fp; + + if ((fp = fopen(name, mode)) == NULL) + err(1, "%s", name); + return(fp); +} + +static void +obsolete(char *argv[]) +{ + char *ap, *p, *start; + + while ((ap = *++argv) != NULL) { + /* Return if "--" or not an option of any form. */ + if (ap[0] != '-') { + if (ap[0] != '+') + return; + } else if (ap[1] == '-') + return; + if (!isdigit((unsigned char)ap[1])) + continue; + /* + * Digit signifies an old-style option. Malloc space for dash, + * new option and argument. + */ + (void)asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1); + if (!p) + err(1, "malloc"); + start = p; + *argv = start; + } +} + +static void +usage(void) +{ + (void)fprintf(stderr, "usage: %s [-cdu] [-f fields] [-s chars] " + "[input_file [output_file]]\n", getprogname()); + exit(1); +} |