diff options
author | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-01 22:55:37 +0000 |
---|---|---|
committer | Kiyoshi Aman <kiyoshi.aman+adelie@gmail.com> | 2019-02-03 18:22:05 -0600 |
commit | 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe (patch) | |
tree | 154a22fe556b49e6927197336f8bf91b12eacd5e /usr.bin/cut | |
download | userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.gz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.bz2 userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.tar.xz userland-5b57d28ffb6e1ef86b50f7d05d977826eae89bfe.zip |
initial population
Diffstat (limited to 'usr.bin/cut')
-rw-r--r-- | usr.bin/cut/cut.1 | 126 | ||||
-rw-r--r-- | usr.bin/cut/cut.c | 306 | ||||
-rw-r--r-- | usr.bin/cut/x_cut.c | 95 |
3 files changed, 527 insertions, 0 deletions
diff --git a/usr.bin/cut/cut.1 b/usr.bin/cut/cut.1 new file mode 100644 index 0000000..a3dbf55 --- /dev/null +++ b/usr.bin/cut/cut.1 @@ -0,0 +1,126 @@ +.\" $NetBSD: cut.1,v 1.18 2012/06/20 17:53:19 wiz Exp $ +.\" +.\" Copyright (c) 1989, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)cut.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd June 12, 2012 +.Dt CUT 1 +.Os +.Sh NAME +.Nm cut +.Nd select portions of each line of a file +.Sh SYNOPSIS +.Nm +.Fl b Ar list +.Op Fl n +.Op Ar +.Nm +.Fl c Ar list +.Op Ar +.Nm +.Fl f Ar list +.Op Fl d Ar string +.Op Fl s +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility selects portions of each line (as specified by +.Ar list ) +from each +.Ar file +and writes them to the +standard output. +If the +.Ar file +argument is a single dash +.Pq Sq - +or no +.Ar file +arguments were specified, lines are read from the standard input. +The items specified by +.Ar list +can be in terms of column position or in terms of fields delimited +by a special character. +Column and field numbering start from 1. +.Pp +.Ar list +is a comma or whitespace separated set of increasing numbers and/or +number ranges. +Number ranges consist of a number, a dash +.Pq Li \- , +and a second number +and select the columns or fields from the first number to the second, +inclusive. +Numbers or number ranges may be preceded by a dash, which selects all +columns or fields from 1 to the first number. +Numbers or number ranges may be followed by a dash, which selects all +columns or fields from the last number to the end of the line. +Numbers and number ranges may be repeated, overlapping, and in any order. +It is not an error to select columns or fields not present in the +input line. +.Pp +The options are as follows: +.Bl -tag -width Fl +.It Fl b Ar list +.Ar list +specifies byte positions. +.It Fl c Ar list +.Ar list +specifies character positions. +.It Fl d Ar string +Use the first character of +.Ar string +as the field delimiter character. +The default is the +.Aq TAB +character. +.It Fl f Ar list +.Ar list +specifies fields, separated by the field delimiter character. +The selected fields are output, +separated by the field delimiter character. +.It Fl n +Do not split multi-byte characters. +.It Fl s +Suppress lines with no field delimiter characters. +Unless specified, lines with no delimiters are passed through unmodified. +.El +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr paste 1 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . diff --git a/usr.bin/cut/cut.c b/usr.bin/cut/cut.c new file mode 100644 index 0000000..d84b46e --- /dev/null +++ b/usr.bin/cut/cut.c @@ -0,0 +1,306 @@ +/* $NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1989, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; +#endif +__RCSID("$NetBSD: cut.c,v 1.29 2014/02/03 20:22:19 wiz Exp $"); +#endif /* not lint */ + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <util.h> +#include <wchar.h> +#include <sys/param.h> + +static int bflag; +static int cflag; +static char dchar; +static int dflag; +static int fflag; +static int sflag; + +static void b_cut(FILE *, const char *); +static void c_cut(FILE *, const char *); +static void f_cut(FILE *, const char *); +static void get_list(char *); +static void usage(void) __dead; + +int +main(int argc, char *argv[]) +{ + FILE *fp; + void (*fcn)(FILE *, const char *); + int ch, rval; + + fcn = NULL; + (void)setlocale(LC_ALL, ""); + + dchar = '\t'; /* default delimiter is \t */ + + /* Since we don't support multi-byte characters, the -c and -b + options are equivalent, and the -n option is meaningless. */ + while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) + switch(ch) { + case 'b': + fcn = b_cut; + get_list(optarg); + bflag = 1; + break; + case 'c': + fcn = c_cut; + get_list(optarg); + cflag = 1; + break; + case 'd': + dchar = *optarg; + dflag = 1; + break; + case 'f': + get_list(optarg); + fcn = f_cut; + fflag = 1; + break; + case 's': + sflag = 1; + break; + case 'n': + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (fflag) { + if (cflag || bflag) + usage(); + } else if ((!cflag && !bflag) || dflag || sflag) + usage(); + else if (bflag && cflag) + usage(); + + rval = 0; + if (*argv) + for (; *argv; ++argv) { + if (strcmp(*argv, "-") == 0) + fcn(stdin, "stdin"); + else { + if ((fp = fopen(*argv, "r"))) { + fcn(fp, *argv); + (void)fclose(fp); + } else { + rval = 1; + warn("%s", *argv); + } + } + } + else + fcn(stdin, "stdin"); + return(rval); +} + +static size_t autostart, autostop, maxval; + +static char *positions = NULL; +static size_t numpositions = 0; +#define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ + +static void +get_list(char *list) +{ + size_t setautostart, start, stop; + char *pos; + char *p; + + if (positions == NULL) { + numpositions = ALLOC_CHUNK; + positions = ecalloc(numpositions, sizeof(*positions)); + } + + /* + * set a byte in the positions array to indicate if a field or + * column is to be selected; use +1, it's 1-based, not 0-based. + * This parser is less restrictive than the Draft 9 POSIX spec. + * POSIX doesn't allow lists that aren't in increasing order or + * overlapping lists. We also handle "-3-5" although there's no + * real reason to. + */ + for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { + setautostart = start = stop = 0; + if (*p == '-') { + ++p; + setautostart = 1; + } + if (isdigit((unsigned char)*p)) { + start = stop = strtol(p, &p, 10); + if (setautostart && start > autostart) + autostart = start; + } + if (*p == '-') { + if (isdigit((unsigned char)p[1])) + stop = strtol(p + 1, &p, 10); + if (*p == '-') { + ++p; + if (!autostop || autostop > stop) + autostop = stop; + } + } + if (*p) + errx(1, "[-bcf] list: illegal list value"); + if (!stop || !start) + errx(1, "[-bcf] list: values may not include zero"); + if (stop + 1 > numpositions) { + size_t newsize; + newsize = roundup(stop + 1, ALLOC_CHUNK); + positions = erealloc(positions, newsize); + (void)memset(positions + numpositions, 0, + newsize - numpositions); + numpositions = newsize; + } + if (maxval < stop) + maxval = stop; + for (pos = positions + start; start++ <= stop; pos++) + *pos = 1; + } + + /* overlapping ranges */ + if (autostop && maxval > autostop) + maxval = autostop; + + /* set autostart */ + if (autostart) + (void)memset(positions + 1, '1', autostart); +} + +static void +/*ARGSUSED*/ +f_cut(FILE *fp, const char *fname __unused) +{ + int ch, field, isdelim; + char *pos, *p, sep; + int output; + size_t len; + char *lbuf, *tbuf; + + for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { + output = 0; + if (lbuf[len - 1] != '\n') { + /* no newline at the end of the last line so add one */ + if ((tbuf = (char *)malloc(len + 1)) == NULL) + err(1, NULL); + (void)memcpy(tbuf, lbuf, len); + tbuf[len++] = '\n'; + lbuf = tbuf; + } + for (isdelim = 0, p = lbuf;; ++p) { + ch = *p; + /* this should work if newline is delimiter */ + if (ch == sep) + isdelim = 1; + if (ch == '\n') { + if (!isdelim && !sflag) + (void)fwrite(lbuf, len, 1, stdout); + break; + } + } + if (!isdelim) + continue; + + pos = positions + 1; + for (field = maxval, p = lbuf; field; --field, ++pos) { + if (*pos) { + if (output++) + (void)putchar(sep); + while ((ch = *p++) != '\n' && ch != sep) + (void)putchar(ch); + } else { + while ((ch = *p++) != '\n' && ch != sep) + continue; + } + if (ch == '\n') + break; + } + if (ch != '\n') { + if (autostop) { + if (output) + (void)putchar(sep); + for (; (ch = *p) != '\n'; ++p) + (void)putchar(ch); + } else + for (; (ch = *p) != '\n'; ++p); + } + (void)putchar('\n'); + if (tbuf) { + free(tbuf); + tbuf = NULL; + } + } + if (tbuf) + free(tbuf); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" + "\tcut -c list [file ...]\n" + "\tcut -f list [-d string] [-s] [file ...]\n"); + exit(1); +} + +/* make b_put(): */ +#define CUT_BYTE 1 +#include "x_cut.c" +#undef CUT_BYTE + +/* make c_put(): */ +#define CUT_BYTE 0 +#include "x_cut.c" +#undef CUT_BYTE diff --git a/usr.bin/cut/x_cut.c b/usr.bin/cut/x_cut.c new file mode 100644 index 0000000..006b5af --- /dev/null +++ b/usr.bin/cut/x_cut.c @@ -0,0 +1,95 @@ +/* $NetBSD: x_cut.c,v 1.2 2007/07/02 18:41:04 christos Exp $ */ + +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This file is #include'd twice from cut.c, to generate both + * single- and multibyte versions of the same code. + * + * In cut.c #define: + * CUT_BYTE=0 to define b_cut (singlebyte), and + * CUT_BYTE=1 to define c_cut (multibyte). + * + */ + +#if (CUT_BYTE == 1) +# define CUT_FN b_cut +# define CUT_CH_T int +# define CUT_GETC getc +# define CUT_EOF EOF +# define CUT_PUTCHAR putchar +#else +# define CUT_FN c_cut +# define CUT_CH_T wint_t +# define CUT_GETC getwc +# define CUT_EOF WEOF +# define CUT_PUTCHAR putwchar +#endif + + +/* ARGSUSED */ +void +CUT_FN(FILE *fp, const char *fname __unused) +{ + CUT_CH_T ch; + int col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = CUT_GETC(fp)) == EOF) + return; + if (ch == '\n') + break; + if (*pos++) + (void)CUT_PUTCHAR(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n') + (void)CUT_PUTCHAR(ch); + else + while ((ch = CUT_GETC(fp)) != CUT_EOF && ch != '\n'); + } + (void)CUT_PUTCHAR('\n'); + } +} + +#undef CUT_FN +#undef CUT_CH_T +#undef CUT_GETC +#undef CUT_EOF +#undef CUT_PUTCHAR + |