From 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe Mon Sep 17 00:00:00 2001 From: Kiyoshi Aman Date: Fri, 1 Feb 2019 22:55:37 +0000 Subject: initial population --- usr.bin/wc/wc.1 | 146 +++++++++++++++++++++++ usr.bin/wc/wc.c | 354 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 500 insertions(+) create mode 100644 usr.bin/wc/wc.1 create mode 100644 usr.bin/wc/wc.c (limited to 'usr.bin/wc') diff --git a/usr.bin/wc/wc.1 b/usr.bin/wc/wc.1 new file mode 100644 index 0000000..60454a3 --- /dev/null +++ b/usr.bin/wc/wc.1 @@ -0,0 +1,146 @@ +.\" $NetBSD: wc.1,v 1.17 2017/07/03 21:34:22 wiz Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from: @(#)wc.1 8.2 (Berkeley) 4/19/94 +.\" +.Dd February 18, 2010 +.Dt WC 1 +.Os +.Sh NAME +.Nm wc +.Nd word, line, and byte count +.Sh SYNOPSIS +.Nm +.Op Fl c | Fl m +.Op Fl Llw +.Op Ar file ... +.Sh DESCRIPTION +The +.Nm +utility displays the number of lines, words, bytes and characters contained in each +input +.Ar file +(or standard input, by default) to the standard output. +A line is defined as a string of characters delimited by a +character, +and a word is defined as a string of characters delimited by white space +characters. +White space characters are the set of characters for which the +.Xr iswspace 3 +function returns true. +If more than one input file is specified, a line of cumulative counts +for all the files is displayed on a separate line after the output for +the last file. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +The number of bytes in each input file +is written to the standard output. +.It Fl L +The number of characters in the longest line of each input file +is written to the standard output. +.It Fl l +The number of lines in each input file +is written to the standard output. +.It Fl m +The number of characters in each input file +is written to the standard output. +.It Fl w +The number of words in each input file +is written to the standard output. +.El +.Pp +When an option is specified, +.Nm +only +reports the +information requested by that option. +The default action is equivalent to all the flags +.Fl clw +having been specified. +.Pp +The following operands are available: +.Bl -tag -width Ds +.It Ar file +A pathname of an input file. +.El +.Pp +If no file names +are specified, the standard input is used and +no file name is displayed. +.Pp +By default, the standard output contains a line for each +input file of the form: +.Bd -literal -offset indent +lines words bytes file_name +.Ed +.Sh EXIT STATUS +.Ex -std wc +.Sh COMPATIBILITY +Historically, the +.Nm +utility was documented to define a word as a ``maximal string of +characters delimited by +.Aq space , +.Aq tab +or +.Aq newline +characters''. +The implementation, however, didn't handle non-printing characters +correctly so that `` ^D^E '' counted as 6 spaces, while ``foo^D^Ebar'' +counted as 8 characters. +.Bx 4 +systems after +.Bx 4.3 +modified the implementation to be consistent +with the documentation. +This implementation defines a ``word'' in terms of the +.Xr iswspace 3 +function, as required by +.St -p1003.2 . +.Pp +The +.Fl L +option is a non-standard extension, compatible with the +.Fl L +option of the GNU and +.Fx +.Nm +utilities. +.Sh SEE ALSO +.Xr iswspace 3 +.Sh STANDARDS +The +.Nm +utility conforms to +.St -p1003.2-92 . diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c new file mode 100644 index 0000000..602a450 --- /dev/null +++ b/usr.bin/wc/wc.c @@ -0,0 +1,354 @@ +/* $NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $ */ + +/* + * Copyright (c) 1980, 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ + The Regents of the University of California. All rights reserved."); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; +#else +__RCSID("$NetBSD: wc.c,v 1.35 2011/09/16 15:39:30 joerg Exp $"); +#endif +#endif /* not lint */ + +/* wc line, word, char count and optionally longest line. */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef NO_QUAD +typedef u_long wc_count_t; +# define WCFMT " %7lu" +# define WCCAST unsigned long +#else +typedef u_quad_t wc_count_t; +# define WCFMT " %7llu" +# define WCCAST unsigned long long +#endif + +static wc_count_t tlinect, twordct, tcharct, tlongest; +static bool doline, doword, dobyte, dochar, dolongest; +static int rval = 0; + +static void cnt(const char *); +static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, + const char *); +__dead static void usage(void); +static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, + size_t *, const char *); + +int +main(int argc, char *argv[]) +{ + int ch; + + setlocale(LC_ALL, ""); + + while ((ch = getopt(argc, argv, "lwcmL")) != -1) + switch (ch) { + case 'l': + doline = true; + break; + case 'w': + doword = true; + break; + case 'm': + dochar = true; + dobyte = 0; + break; + case 'c': + dochar = 0; + dobyte = true; + break; + case 'L': + dolongest = true; + break; + case '?': + default: + usage(); + } + argv += optind; + argc -= optind; + + /* Wc's flags are on by default. */ + if (!(doline || doword || dobyte || dochar || dolongest)) + doline = doword = dobyte = true; + + if (*argv == NULL) { + cnt(NULL); + } else { + bool dototal = (argc > 1); + + do { + cnt(*argv); + } while(*++argv); + + if (dototal) { + print_counts(tlinect, twordct, tcharct, tlongest, + "total"); + } + } + + exit(rval); +} + +static size_t +do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, + size_t *retcnt, const char *file) +{ + size_t r; + size_t c = 0; + + do { + r = mbrtowc(wc, p, len, st); + if (r == (size_t)-1) { + warnx("%s: invalid byte sequence", file); + rval = 1; + + /* XXX skip 1 byte */ + len--; + p++; + memset(st, 0, sizeof(*st)); + continue; + } else if (r == (size_t)-2) + break; + else if (r == 0) + r = 1; + c++; + if (wc) + wc++; + len -= r; + p += r; + } while (len > 0); + + *retcnt = c; + + return (r); +} + +static void +cnt(const char *file) +{ + u_char buf[MAXBSIZE]; + wchar_t wbuf[MAXBSIZE]; + struct stat sb; + wc_count_t charct, linect, wordct, longest; + mbstate_t st; + u_char *C; + wchar_t *WC; + const char *name; /* filename or */ + size_t r = 0; + int fd, len = 0; + + linect = wordct = charct = longest = 0; + if (file != NULL) { + if ((fd = open(file, O_RDONLY, 0)) < 0) { + warn("%s", file); + rval = 1; + return; + } + name = file; + } else { + fd = STDIN_FILENO; + name = ""; + } + + if (dochar || doword || dolongest) + (void)memset(&st, 0, sizeof(st)); + + if (!(doword || dolongest)) { + /* + * line counting is split out because it's a lot + * faster to get lines than to get words, since + * the word count requires some logic. + */ + if (doline || dochar) { + while ((len = read(fd, buf, MAXBSIZE)) > 0) { + if (dochar) { + size_t wlen; + + r = do_mb(0, (char *)buf, (size_t)len, + &st, &wlen, name); + charct += wlen; + } else if (dobyte) + charct += len; + if (doline) { + for (C = buf; len--; ++C) { + if (*C == '\n') + ++linect; + } + } + } + } + + /* + * if all we need is the number of characters and + * it's a directory or a regular or linked file, just + * stat the puppy. We avoid testing for it not being + * a special device in case someone adds a new type + * of inode. + */ + else if (dobyte) { + if (fstat(fd, &sb)) { + warn("%s", name); + rval = 1; + } else { + if (S_ISREG(sb.st_mode) || + S_ISLNK(sb.st_mode) || + S_ISDIR(sb.st_mode)) { + charct = sb.st_size; + } else { + while ((len = + read(fd, buf, MAXBSIZE)) > 0) + charct += len; + } + } + } + } else { + /* do it the hard way... */ + wc_count_t linelen; + bool gotsp; + + linelen = 0; + gotsp = true; + while ((len = read(fd, buf, MAXBSIZE)) > 0) { + size_t wlen; + + r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, + name); + if (dochar) { + charct += wlen; + } else if (dobyte) { + charct += len; + } + for (WC = wbuf; wlen--; ++WC) { + if (iswspace(*WC)) { + gotsp = true; + if (*WC == L'\n') { + ++linect; + if (linelen > longest) + longest = linelen; + linelen = 0; + } else { + linelen++; + } + } else { + /* + * This line implements the POSIX + * spec, i.e. a word is a "maximal + * string of characters delimited by + * whitespace." Notice nothing was + * said about a character being + * printing or non-printing. + */ + if (gotsp) { + gotsp = false; + ++wordct; + } + + linelen++; + } + } + } + } + + if (len == -1) { + warn("%s", name); + rval = 1; + } + if (dochar && r == (size_t)-2) { + warnx("%s: incomplete multibyte character", name); + rval = 1; + } + + print_counts(linect, wordct, charct, longest, file); + + /* + * don't bother checkint doline, doword, or dobyte --- speeds + * up the common case + */ + tlinect += linect; + twordct += wordct; + tcharct += charct; + if (dolongest && longest > tlongest) + tlongest = longest; + + if (close(fd)) { + warn("%s", name); + rval = 1; + } +} + +static void +print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, + wc_count_t longest, const char *name) +{ + + if (doline) + (void)printf(WCFMT, (WCCAST)lines); + if (doword) + (void)printf(WCFMT, (WCCAST)words); + if (dobyte || dochar) + (void)printf(WCFMT, (WCCAST)chars); + if (dolongest) + (void)printf(WCFMT, (WCCAST)longest); + + if (name != NULL) + (void)printf(" %s\n", name); + else + (void)putchar('\n'); +} + +static void +usage(void) +{ + + (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); + exit(1); +} -- cgit v1.2.3-60-g2f50