From 5b57d28ffb6e1ef86b50f7d05d977826eae89bfe Mon Sep 17 00:00:00 2001 From: Kiyoshi Aman Date: Fri, 1 Feb 2019 22:55:37 +0000 Subject: initial population --- usr.bin/grep/fastgrep.c | 336 ++++++++++++++++ usr.bin/grep/file.c | 276 +++++++++++++ usr.bin/grep/grep.1 | 486 +++++++++++++++++++++++ usr.bin/grep/grep.c | 722 +++++++++++++++++++++++++++++++++++ usr.bin/grep/grep.h | 162 ++++++++ usr.bin/grep/nls/C.msg | 13 + usr.bin/grep/nls/es_ES.ISO8859-1.msg | 14 + usr.bin/grep/nls/gl_ES.ISO8859-1.msg | 14 + usr.bin/grep/nls/hu_HU.ISO8859-2.msg | 14 + usr.bin/grep/nls/ja_JP.SJIS.msg | 14 + usr.bin/grep/nls/ja_JP.UTF-8.msg | 14 + usr.bin/grep/nls/ja_JP.eucJP.msg | 14 + usr.bin/grep/nls/pt_BR.ISO8859-1.msg | 14 + usr.bin/grep/nls/ru_RU.KOI8-R.msg | 14 + usr.bin/grep/nls/uk_UA.UTF-8.msg | 13 + usr.bin/grep/nls/zh_CN.UTF-8.msg | 14 + usr.bin/grep/queue.c | 116 ++++++ usr.bin/grep/util.c | 500 ++++++++++++++++++++++++ 18 files changed, 2750 insertions(+) create mode 100644 usr.bin/grep/fastgrep.c create mode 100644 usr.bin/grep/file.c create mode 100644 usr.bin/grep/grep.1 create mode 100644 usr.bin/grep/grep.c create mode 100644 usr.bin/grep/grep.h create mode 100644 usr.bin/grep/nls/C.msg create mode 100644 usr.bin/grep/nls/es_ES.ISO8859-1.msg create mode 100644 usr.bin/grep/nls/gl_ES.ISO8859-1.msg create mode 100644 usr.bin/grep/nls/hu_HU.ISO8859-2.msg create mode 100644 usr.bin/grep/nls/ja_JP.SJIS.msg create mode 100644 usr.bin/grep/nls/ja_JP.UTF-8.msg create mode 100644 usr.bin/grep/nls/ja_JP.eucJP.msg create mode 100644 usr.bin/grep/nls/pt_BR.ISO8859-1.msg create mode 100644 usr.bin/grep/nls/ru_RU.KOI8-R.msg create mode 100644 usr.bin/grep/nls/uk_UA.UTF-8.msg create mode 100644 usr.bin/grep/nls/zh_CN.UTF-8.msg create mode 100644 usr.bin/grep/queue.c create mode 100644 usr.bin/grep/util.c (limited to 'usr.bin/grep') diff --git a/usr.bin/grep/fastgrep.c b/usr.bin/grep/fastgrep.c new file mode 100644 index 0000000..2fcd864 --- /dev/null +++ b/usr.bin/grep/fastgrep.c @@ -0,0 +1,336 @@ +/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */ +/* $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * XXX: This file is a speed up for grep to cover the defects of the + * regex library. These optimizations should practically be implemented + * there keeping this code clean. This is a future TODO, but for the + * meantime, we need to use this workaround. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: fastgrep.c,v 1.5 2011/04/18 03:27:40 joerg Exp $"); + +#include +#include +#include +#include +#include +#include + +#include "grep.h" + +static inline int grep_cmp(const unsigned char *, const unsigned char *, size_t); +static inline void grep_revstr(unsigned char *, int); + +void +fgrepcomp(fastgrep_t *fg, const char *pat) +{ + unsigned int i; + + /* Initialize. */ + fg->len = strlen(pat); + fg->bol = false; + fg->eol = false; + fg->reversed = false; + + fg->pattern = (unsigned char *)grep_strdup(pat); + + /* Preprocess pattern. */ + for (i = 0; i <= UCHAR_MAX; i++) + fg->qsBc[i] = fg->len; + for (i = 1; i < fg->len; i++) + fg->qsBc[fg->pattern[i]] = fg->len - i; +} + +/* + * Returns: -1 on failure, 0 on success + */ +int +fastcomp(fastgrep_t *fg, const char *pat) +{ + unsigned int i; + int firstHalfDot = -1; + int firstLastHalfDot = -1; + int hasDot = 0; + int lastHalfDot = 0; + int shiftPatternLen; + + /* Initialize. */ + fg->len = strlen(pat); + fg->bol = false; + fg->eol = false; + fg->reversed = false; + fg->word = wflag; + + /* Remove end-of-line character ('$'). */ + if (fg->len > 0 && pat[fg->len - 1] == '$') { + fg->eol = true; + fg->len--; + } + + /* Remove beginning-of-line character ('^'). */ + if (pat[0] == '^') { + fg->bol = true; + fg->len--; + pat++; + } + + if (fg->len >= 14 && + memcmp(pat, "[[:<:]]", 7) == 0 && + memcmp(pat + fg->len - 7, "[[:>:]]", 7) == 0) { + fg->len -= 14; + pat += 7; + /* Word boundary is handled separately in util.c */ + fg->word = true; + } + + /* + * pat has been adjusted earlier to not include '^', '$' or + * the word match character classes at the beginning and ending + * of the string respectively. + */ + fg->pattern = grep_malloc(fg->len + 1); + memcpy(fg->pattern, pat, fg->len); + fg->pattern[fg->len] = '\0'; + + /* Look for ways to cheat...er...avoid the full regex engine. */ + for (i = 0; i < fg->len; i++) { + /* Can still cheat? */ + if (fg->pattern[i] == '.') { + hasDot = i; + if (i < fg->len / 2) { + if (firstHalfDot < 0) + /* Closest dot to the beginning */ + firstHalfDot = i; + } else { + /* Closest dot to the end of the pattern. */ + lastHalfDot = i; + if (firstLastHalfDot < 0) + firstLastHalfDot = i; + } + } else { + /* Free memory and let others know this is empty. */ + free(fg->pattern); + fg->pattern = NULL; + return (-1); + } + } + + /* + * Determine if a reverse search would be faster based on the placement + * of the dots. + */ + if ((!(lflag || cflag)) && ((!(fg->bol || fg->eol)) && + ((lastHalfDot) && ((firstHalfDot < 0) || + ((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) && + !oflag && !color) { + fg->reversed = true; + hasDot = fg->len - (firstHalfDot < 0 ? + firstLastHalfDot : firstHalfDot) - 1; + grep_revstr(fg->pattern, fg->len); + } + + /* + * Normal Quick Search would require a shift based on the position the + * next character after the comparison is within the pattern. With + * wildcards, the position of the last dot effects the maximum shift + * distance. + * The closer to the end the wild card is the slower the search. A + * reverse version of this algorithm would be useful for wildcards near + * the end of the string. + * + * Examples: + * Pattern Max shift + * ------- --------- + * this 5 + * .his 4 + * t.is 3 + * th.s 2 + * thi. 1 + */ + + /* Adjust the shift based on location of the last dot ('.'). */ + shiftPatternLen = fg->len - hasDot; + + /* Preprocess pattern. */ + for (i = 0; i <= (signed)UCHAR_MAX; i++) + fg->qsBc[i] = shiftPatternLen; + for (i = hasDot + 1; i < fg->len; i++) { + fg->qsBc[fg->pattern[i]] = fg->len - i; + } + + /* + * Put pattern back to normal after pre-processing to allow for easy + * comparisons later. + */ + if (fg->reversed) + grep_revstr(fg->pattern, fg->len); + + return (0); +} + +int +grep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch) +{ + unsigned int j; + int ret = REG_NOMATCH; + + if (pmatch->rm_so == (ssize_t)len) + return (ret); + + if (fg->bol && pmatch->rm_so != 0) { + pmatch->rm_so = len; + pmatch->rm_eo = len; + return (ret); + } + + /* No point in going farther if we do not have enough data. */ + if (len < fg->len) + return (ret); + + /* Only try once at the beginning or ending of the line. */ + if (fg->bol || fg->eol) { + /* Simple text comparison. */ + /* Verify data is >= pattern length before searching on it. */ + if (len >= fg->len) { + /* Determine where in data to start search at. */ + j = fg->eol ? len - fg->len : 0; + if (!((fg->bol && fg->eol) && (len != fg->len))) + if (grep_cmp(fg->pattern, data + j, + fg->len) == -1) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->len; + ret = 0; + } + } + } else if (fg->reversed) { + /* Quick Search algorithm. */ + j = len; + do { + if (grep_cmp(fg->pattern, data + j - fg->len, + fg->len) == -1) { + pmatch->rm_so = j - fg->len; + pmatch->rm_eo = j; + ret = 0; + break; + } + /* Shift if within bounds, otherwise, we are done. */ + if (j == fg->len) + break; + j -= fg->qsBc[data[j - fg->len - 1]]; + } while (j >= fg->len); + } else { + /* Quick Search algorithm. */ + j = pmatch->rm_so; + do { + if (grep_cmp(fg->pattern, data + j, fg->len) == -1) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->len; + ret = 0; + break; + } + + /* Shift if within bounds, otherwise, we are done. */ + if (j + fg->len == len) + break; + else + j += fg->qsBc[data[j + fg->len]]; + } while (j <= (len - fg->len)); + } + + return (ret); +} + +/* + * Returns: i >= 0 on failure (position that it failed) + * -1 on success + */ +static inline int +grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len) +{ + size_t size; + wchar_t *wdata, *wpat; + unsigned int i; + + if (iflag) { + if ((size = mbstowcs(NULL, (const char *)data, 0)) == + ((size_t) - 1)) + return (-1); + + wdata = grep_malloc(size * sizeof(wint_t)); + + if (mbstowcs(wdata, (const char *)data, size) == + ((size_t) - 1)) + return (-1); + + if ((size = mbstowcs(NULL, (const char *)pat, 0)) == + ((size_t) - 1)) + return (-1); + + wpat = grep_malloc(size * sizeof(wint_t)); + + if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1)) + return (-1); + for (i = 0; i < len; i++) { + if ((towlower(wpat[i]) == towlower(wdata[i])) || + ((grepbehave != GREP_FIXED) && wpat[i] == L'.')) + continue; + free(wpat); + free(wdata); + return (i); + } + } else { + for (i = 0; i < len; i++) { + if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) && + pat[i] == '.')) + continue; + return (i); + } + } + return (-1); +} + +static inline void +grep_revstr(unsigned char *str, int len) +{ + int i; + char c; + + for (i = 0; i < len / 2; i++) { + c = str[i]; + str[i] = str[len - i - 1]; + str[len - i - 1] = c; + } +} diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c new file mode 100644 index 0000000..ef057ba --- /dev/null +++ b/usr.bin/grep/file.c @@ -0,0 +1,276 @@ +/* $NetBSD: file.c,v 1.10 2018/08/12 09:03:21 christos Exp $ */ +/* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */ +/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008-2010 Gabor Kovesdan + * Copyright (C) 2010 Dimitry Andric + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: file.c,v 1.10 2018/08/12 09:03:21 christos Exp $"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "grep.h" + +#define MAXBUFSIZ (32 * 1024) +#define LNBUFBUMP 80 + +#ifndef WITHOUT_GZIP +static gzFile gzbufdesc; +#endif +#ifndef WITHOUT_BZ2 +static BZFILE* bzbufdesc; +#endif + +static unsigned char buffer[MAXBUFSIZ]; +static unsigned char *bufpos; +static size_t bufrem; + +static unsigned char *lnbuf; +static size_t lnbuflen; + +static inline int +grep_refill(struct file *f) +{ + ssize_t nr = -1; + int bzerr; + + bufpos = buffer; + bufrem = 0; + +#ifndef WITHOUT_GZIP + if (filebehave == FILE_GZIP) { + nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); + if (nr == -1) + return -1; + } +#endif +#ifndef WITHOUT_BZ2 + if (filebehave == FILE_BZIP && bzbufdesc != NULL) { + nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); + switch (bzerr) { + case BZ_OK: + case BZ_STREAM_END: + /* No problem, nr will be okay */ + break; + case BZ_DATA_ERROR_MAGIC: + /* + * As opposed to gzread(), which simply returns the + * plain file data, if it is not in the correct + * compressed format, BZ2_bzRead() instead aborts. + * + * So, just restart at the beginning of the file again, + * and use plain reads from now on. + */ + BZ2_bzReadClose(&bzerr, bzbufdesc); + bzbufdesc = NULL; + if (lseek(f->fd, 0, SEEK_SET) == -1) + return (-1); + nr = read(f->fd, buffer, MAXBUFSIZ); + break; + default: + /* Make sure we exit with an error */ + nr = -1; + } + if (nr == -1) + return -1; + } +#endif + if (nr == -1) { + nr = read(f->fd, buffer, MAXBUFSIZ); + } + + if (nr < 0) + return (-1); + + bufrem = nr; + return (0); +} + +static inline int +grep_lnbufgrow(size_t newlen) +{ + + if (lnbuflen < newlen) { + lnbuf = grep_realloc(lnbuf, newlen); + lnbuflen = newlen; + } + + return (0); +} + +char * +grep_fgetln(struct file *f, size_t *lenp) +{ + unsigned char *p; + char *ret; + size_t len; + size_t off; + ptrdiff_t diff; + + /* Fill the buffer, if necessary */ + if (bufrem == 0 && grep_refill(f) != 0) + goto error; + + if (bufrem == 0) { + /* Return zero length to indicate EOF */ + *lenp = 0; + return ((char *)bufpos); + } + + /* Look for a newline in the remaining part of the buffer */ + if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) { + ++p; /* advance over newline */ + ret = (char *)bufpos; + len = p - bufpos; + bufrem -= len; + bufpos = p; + *lenp = len; + return (ret); + } + + /* We have to copy the current buffered data to the line buffer */ + for (len = bufrem, off = 0; ; len += bufrem) { + /* Make sure there is room for more data */ + if (grep_lnbufgrow(len + LNBUFBUMP)) + goto error; + memcpy(lnbuf + off, bufpos, len - off); + off = len; + if (grep_refill(f) != 0) + goto error; + if (bufrem == 0) + /* EOF: return partial line */ + break; + if ((p = memchr(bufpos, line_sep, bufrem)) == NULL) + continue; + /* got it: finish up the line (like code above) */ + ++p; + diff = p - bufpos; + len += diff; + if (grep_lnbufgrow(len)) + goto error; + memcpy(lnbuf + off, bufpos, diff); + bufrem -= diff; + bufpos = p; + break; + } + *lenp = len; + return ((char *)lnbuf); + +error: + *lenp = 0; + return (NULL); +} + +static inline struct file * +grep_file_init(struct file *f) +{ + +#ifndef WITHOUT_GZIP + if (filebehave == FILE_GZIP && + (gzbufdesc = gzdopen(f->fd, "r")) == NULL) + goto error; +#endif + +#ifndef WITHOUT_BZ2 + if (filebehave == FILE_BZIP && + (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) + goto error; +#endif + + /* Fill read buffer, also catches errors early */ + if (grep_refill(f) != 0) + goto error; + + /* Check for binary stuff, if necessary */ + if (!nulldataflag && binbehave != BINFILE_TEXT && + memchr(bufpos, '\0', bufrem) != NULL) + f->binary = true; + + return (f); +error: + close(f->fd); + free(f); + return (NULL); +} + +/* + * Opens a file for processing. + */ +struct file * +grep_open(const char *path) +{ + struct file *f; + + f = grep_malloc(sizeof *f); + memset(f, 0, sizeof *f); + if (path == NULL) { + /* Processing stdin implies --line-buffered. */ + lbflag = true; + f->fd = STDIN_FILENO; + } else if ((f->fd = open(path, O_RDONLY)) == -1) { + free(f); + return (NULL); + } + + return (grep_file_init(f)); +} + +/* + * Closes a file. + */ +void +grep_close(struct file *f) +{ + + close(f->fd); + + /* Reset read buffer and line buffer */ + bufpos = buffer; + bufrem = 0; + + free(lnbuf); + lnbuf = NULL; + lnbuflen = 0; +} diff --git a/usr.bin/grep/grep.1 b/usr.bin/grep/grep.1 new file mode 100644 index 0000000..7446cad --- /dev/null +++ b/usr.bin/grep/grep.1 @@ -0,0 +1,486 @@ +.\" $NetBSD: grep.1,v 1.4 2012/04/08 22:00:38 wiz Exp $ +.\" $FreeBSD: head/usr.bin/grep/grep.1 210652 2010-07-30 14:05:20Z joel $ +.\" $OpenBSD: grep.1,v 1.38 2010/04/05 06:30:59 jmc Exp $ +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)grep.1 8.3 (Berkeley) 4/18/94 +.\" +.Dd April 19, 2011 +.Dt GREP 1 +.Os +.Sh NAME +.Nm grep , egrep , fgrep , +.Nm zgrep , zegrep , zfgrep +.Nd file pattern searcher +.Sh SYNOPSIS +.Nm grep +.Op Fl abcdDEFGHhIiJLlmnOopqRSsUVvwxZz +.Op Fl A Ar num +.Op Fl B Ar num +.Op Fl C Ns Op Ar num +.Op Fl e Ar pattern +.Op Fl f Ar file +.Op Fl Fl binary-files Ns = Ns Ar value +.Op Fl Fl color Ns Op = Ns Ar when +.Op Fl Fl colour Ns Op = Ns Ar when +.Op Fl Fl context Ns Op = Ns Ar num +.Op Fl Fl decompress +.Op Fl Fl label +.Op Fl Fl line-buffered +.Op Ar pattern +.Op Ar +.Sh DESCRIPTION +The +.Nm grep +utility searches any given input files, +selecting lines that match one or more patterns. +By default, a pattern matches an input line if the regular expression +(RE) in the pattern matches the input line +without its trailing newline. +An empty expression matches every line. +Each input line that matches at least one of the patterns is written +to the standard output. +.Pp +.Nm grep +is used for simple patterns and +basic regular expressions +.Pq BREs ; +.Nm egrep +can handle extended regular expressions +.Pq EREs . +See +.Xr re_format 7 +for more information on regular expressions. +.Nm fgrep +is quicker than both +.Nm grep +and +.Nm egrep , +but can only handle fixed patterns +(i.e. it does not interpret regular expressions). +Patterns may consist of one or more lines, +allowing any of the pattern lines to match a portion of the input. +.Pp +.Nm zgrep , +.Nm zegrep , +and +.Nm zfgrep +act like +.Nm grep , +.Nm egrep , +and +.Nm fgrep , +respectively, but accept input files compressed with the +.Xr compress 1 +or +.Xr gzip 1 +compression utilities. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl A Ar num , Fl Fl after-context Ns = Ns Ar num +Print +.Ar num +lines of trailing context after each match. +See also the +.Fl B +and +.Fl C +options. +.It Fl a , Fl Fl text +Treat all files as ASCII text. +Normally +.Nm +will simply print +.Dq Binary file ... matches +if files contain binary characters. +Use of this option forces +.Nm +to output lines matching the specified pattern. +.It Fl B Ar num , Fl Fl before-context Ns = Ns Ar num +Print +.Ar num +lines of leading context before each match. +See also the +.Fl A +and +.Fl C +options. +.It Fl b , Fl Fl byte-offset +The offset in bytes of a matched pattern is +displayed in front of the respective matched line. +.It Fl C Ns Op Ar num , Fl Fl context Ns = Ns Ar num +Print +.Ar num +lines of leading and trailing context surrounding each match. +The default is 2 and is equivalent to +.Fl A +.Ar 2 +.Fl B +.Ar 2 . +Note: +no whitespace may be given between the option and its argument. +.It Fl c , Fl Fl count +Only a count of selected lines is written to standard output. +.It Fl Fl colour Ns = Ns Op Ar when , Fl Fl color Ns = Ns Op Ar when +Mark up the matching text with the expression stored in +.Ev GREP_COLOR +environment variable. +The possible values of when can be `never', `always' or `auto'. +.It Fl D Ar action , Fl Fl devices Ns = Ns Ar action +Specify the demanded action for devices, FIFOs and sockets. +The default action is `read', which means, that they are read +as if they were normal files. +If the action is set to `skip', devices will be silently skipped. +.It Fl d Ar action , Fl Fl directories Ns = Ns Ar action +Specify the demanded action for directories. +It is `read' by default, which means that the directories +are read in the same manner as normal files. +Other possible values are `skip' to silently ignore the +directories, and `recurse' to read them recursively, which +has the same effect as the +.Fl R +and +.Fl r +option. +.It Fl E , Fl Fl extended-regexp +Interpret +.Ar pattern +as an extended regular expression +(i.e. force +.Nm grep +to behave as +.Nm egrep ) . +.It Fl e Ar pattern , Fl Fl regexp Ns = Ns Ar pattern +Specify a pattern used during the search of the input: +an input line is selected if it matches any of the specified patterns. +This option is most useful when multiple +.Fl e +options are used to specify multiple patterns, +or when a pattern begins with a dash +.Pq Sq - . +.It Fl Fl exclude +If specified, it excludes files matching the given +filename pattern from the search. +Note that +.Fl Fl exclude +patterns take priority over +.Fl Fl include +patterns, and if no +.Fl Fl include +pattern is specified, all files are searched that are +not excluded. +Patterns are matched to the full path specified, +not only to the filename component. +.It Fl Fl exclude-dir +If +.Fl R +is specified, it excludes directories matching the +given filename pattern from the search. +Note that +.Fl Fl exclude-dir +patterns take priority over +.Fl Fl include-dir +patterns, and if no +.Fl Fl include-dir +pattern is specified, all directories are searched that are +not excluded. +.It Fl F , Fl Fl fixed-strings +Interpret +.Ar pattern +as a set of fixed strings +(i.e. force +.Nm grep +to behave as +.Nm fgrep ) . +.It Fl f Ar file , Fl Fl file Ns = Ns Ar file +Read one or more newline separated patterns from +.Ar file . +Empty pattern lines match every input line. +Newlines are not considered part of a pattern. +If +.Ar file +is empty, nothing is matched. +.It Fl G , Fl Fl basic-regexp +Interpret +.Ar pattern +as a basic regular expression +(i.e. force +.Nm grep +to behave as traditional +.Nm grep ) . +.It Fl H +Always print filename headers with output lines. +.It Fl h , Fl Fl no-filename +Never print filename headers +.Pq i.e. filenames +with output lines. +.It Fl Fl help +Print a brief help message. +.It Fl I +Ignore binary files. +This option is equivalent to +.Fl Fl binary-file Ns = Ns Ar without-match +option. +.It Fl i , Fl Fl ignore-case +Perform case insensitive matching. +By default, +.Nm grep +is case sensitive. +.It Fl Fl include +If specified, only files matching the +given filename pattern are searched. +Note that +.Fl Fl exclude +patterns take priority over +.Fl Fl include +patterns. +Patterns are matched to the full path specified, +not only to the filename component. +.It Fl Fl include-dir +If +.Fl R +is specified, only directories matching the +given filename pattern are searched. +Note that +.Fl Fl exclude-dir +patterns take priority over +.Fl Fl include-dir +patterns. +.It Fl J, Fl Fl bz2decompress +Decompress the +.Xr bzip2 1 +compressed file before looking for the text. +.It Fl L , Fl Fl files-without-match +Only the names of files not containing selected lines are written to +standard output. +Pathnames are listed once per file searched. +If the standard input is searched, the string +.Dq (standard input) +is written. +.It Fl l , Fl Fl files-with-matches +Only the names of files containing selected lines are written to +standard output. +.Nm grep +will only search a file until a match has been found, +making searches potentially less expensive. +Pathnames are listed once per file searched. +If the standard input is searched, the string +.Dq (standard input) +is written. +.It Fl Fl mmap +Use +.Xr mmap 2 +instead of +.Xr read 2 +to read input, which can result in better performance under some +circumstances but can cause undefined behaviour. +.It Fl m Ar num, Fl Fl max-count Ns = Ns Ar num +Stop reading the file after +.Ar num +matches. +.It Fl n , Fl Fl line-number +Each output line is preceded by its relative line number in the file, +starting at line 1. +The line number counter is reset for each file processed. +This option is ignored if +.Fl c , +.Fl L , +.Fl l , +or +.Fl q +is +specified. +.It Fl O +If +.Fl R +is specified, follow symbolic links only if they were explicitly listed +on the command line. +The default is not to follow symbolic links. +.It Fl o, Fl Fl only-matching +Prints only the matching part of the lines. +.It Fl p +If +.Fl R +is specified, no symbolic links are followed. +This is the default. +.It Fl q , Fl Fl quiet , Fl Fl silent +Quiet mode: +suppress normal output. +.Nm grep +will only search a file until a match has been found, +making searches potentially less expensive. +.It Fl R , Fl r , Fl Fl recursive +Recursively search subdirectories listed. +.It Fl S +If +.Fl R +is specified, all symbolic links are followed. +The default is not to follow symbolic links. +.It Fl s , Fl Fl no-messages +Silent mode. +Nonexistent and unreadable files are ignored +(i.e. their error messages are suppressed). +.It Fl U , Fl Fl binary +Search binary files, but do not attempt to print them. +.It Fl V , Fl Fl version +Display version information and exit. +.It Fl v , Fl Fl invert-match +Selected lines are those +.Em not +matching any of the specified patterns. +.It Fl w , Fl Fl word-regexp +The expression is searched for as a word (as if surrounded by +.Sq [[:<:]] +and +.Sq [[:>:]] ; +see +.Xr re_format 7 ) . +.It Fl x , Fl Fl line-regexp +Only input lines selected against an entire fixed string or regular +expression are considered to be matching lines. +.It Fl y +Equivalent to +.Fl i . +Obsoleted. +.It Fl Z , Fl Fl null +Prints a zero-byte after the file name. +.It Fl z , Fl Fl null-data +Use the zero byte (ASCII NUL) as line separator. +.It Fl Fl binary-files Ns = Ns Ar value +Controls searching and printing of binary files. +Options are +.Ar binary , +the default: search binary files but do not print them; +.Ar without-match : +do not search binary files; +and +.Ar text : +treat all files as text. +.It Fl Fl decompress +Detect input files compressed with +.Xr bzip2 1 +or +.Xr gzip 1 +and decompress them dynamically. +This makes +.Nm grep +behave like +.Nm zgrep . +.It Fl Fl line-buffered +Force output to be line buffered. +By default, output is line buffered when standard output is a terminal +and block buffered otherwise. +.Pp +.El +If no file arguments are specified, the standard input is used. +.Sh EXIT STATUS +The +.Nm grep +utility exits with one of the following values: +.Pp +.Bl -tag -width flag -compact +.It Li 0 +One or more lines were selected. +.It Li 1 +No lines were selected. +.It Li \*(Gt1 +An error occurred. +.El +.Sh EXAMPLES +To find all occurrences of the word +.Sq patricia +in a file: +.Pp +.Dl $ grep 'patricia' myfile +.Pp +To find all occurrences of the pattern +.Ql .Pp +at the beginning of a line: +.Pp +.Dl $ grep '^\e.Pp' myfile +.Pp +The apostrophes ensure the entire expression is evaluated by +.Nm grep +instead of by the user's shell. +The caret +.Ql ^ +matches the null string at the beginning of a line, +and the +.Ql \e +escapes the +.Ql \&. , +which would otherwise match any character. +.Pp +To find all lines in a file which do not contain the words +.Sq foo +or +.Sq bar : +.Pp +.Dl $ grep -v -e 'foo' -e 'bar' myfile +.Pp +A simple example of an extended regular expression: +.Pp +.Dl $ egrep '19|20|25' calendar +.Pp +Peruses the file +.Sq calendar +looking for either 19, 20, or 25. +.Sh SEE ALSO +.Xr ed 1 , +.Xr ex 1 , +.Xr gzip 1 , +.Xr sed 1 , +.Xr re_format 7 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl AaBbCDdGHhIJLmoPRSUVwZ +are extensions to that specification, and the behaviour of the +.Fl f +flag when used with an empty pattern file is left undefined. +.Pp +All long options are provided for compatibility with +GNU versions of this utility. +.Pp +Historic versions of the +.Nm grep +utility also supported the flags +.Op Fl ruy . +This implementation supports those options; +however, their use is strongly discouraged. +.Sh HISTORY +The +.Nm grep +command first appeared in +.At v6 . diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c new file mode 100644 index 0000000..bad2a73 --- /dev/null +++ b/usr.bin/grep/grep.c @@ -0,0 +1,722 @@ +/* $NetBSD: grep.c,v 1.15 2018/08/12 09:03:21 christos Exp $ */ +/* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */ +/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008-2009 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: grep.c,v 1.15 2018/08/12 09:03:21 christos Exp $"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "grep.h" + +#ifndef WITHOUT_NLS +#include +nl_catd catalog; +#endif + +/* + * Default messags to use when NLS is disabled or no catalogue + * is found. + */ +const char *errstr[] = { + "", +/* 1*/ "(standard input)", +/* 2*/ "cannot read bzip2 compressed file", +/* 3*/ "unknown %s option", +/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n", +/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", +/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", +/* 7*/ "\t[pattern] [file ...]\n", +/* 8*/ "Binary file %s matches\n", +/* 9*/ "%s (BSD grep) %s\n", +}; + +/* Flags passed to regcomp() and regexec() */ +int cflags = 0; +int eflags = REG_STARTEND; + +/* Searching patterns */ +unsigned int patterns, pattern_sz; +char **pattern; +regex_t *r_pattern; +fastgrep_t *fg_pattern; + +/* Filename exclusion/inclusion patterns */ +unsigned int fpatterns, fpattern_sz; +unsigned int dpatterns, dpattern_sz; +struct epat *dpattern, *fpattern; + +/* For regex errors */ +char re_error[RE_ERROR_BUF + 1]; + +/* Command-line flags */ +unsigned long long Aflag; /* -A x: print x lines trailing each match */ +unsigned long long Bflag; /* -B x: print x lines leading each match */ +bool Hflag; /* -H: always print file name */ +bool Lflag; /* -L: only show names of files with no matches */ +bool bflag; /* -b: show block numbers for each match */ +bool cflag; /* -c: only show a count of matching lines */ +bool hflag; /* -h: don't print filename headers */ +bool iflag; /* -i: ignore case */ +bool lflag; /* -l: only show names of files with matches */ +bool mflag; /* -m x: stop reading the files after x matches */ +unsigned long long mcount; /* count for -m */ +bool nflag; /* -n: show line numbers in front of matching lines */ +bool oflag; /* -o: print only matching part */ +bool qflag; /* -q: quiet mode (don't output anything) */ +bool sflag; /* -s: silent mode (ignore errors) */ +bool vflag; /* -v: only show non-matching lines */ +bool wflag; /* -w: pattern must start and end on word boundaries */ +bool xflag; /* -x: pattern must match entire line */ +bool lbflag; /* --line-buffered */ +bool nullflag; /* --null */ +bool nulldataflag; /* --null-data */ +unsigned char line_sep = '\n'; /* 0 for --null-data */ +char *label; /* --label */ +const char *color; /* --color */ +int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ +int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ +int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ +int devbehave = DEV_READ; /* -D: handling of devices */ +int dirbehave = DIR_READ; /* -dRr: handling of directories */ +int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ + +bool dexclude, dinclude; /* --exclude-dir and --include-dir */ +bool fexclude, finclude; /* --exclude and --include */ + +enum { + BIN_OPT = CHAR_MAX + 1, + COLOR_OPT, + DECOMPRESS_OPT, + HELP_OPT, + MMAP_OPT, + LINEBUF_OPT, + LABEL_OPT, + R_EXCLUDE_OPT, + R_INCLUDE_OPT, + R_DEXCLUDE_OPT, + R_DINCLUDE_OPT +}; + +static inline const char *init_color(const char *); + +/* Housekeeping */ +int tail; /* lines left to print */ +bool notfound; /* file not found */ + +extern char *__progname; + +/* + * Prints usage information and returns 2. + */ +__dead static void +usage(void) +{ + fprintf(stderr, getstr(4), __progname); + fprintf(stderr, "%s", getstr(5)); + fprintf(stderr, "%s", getstr(6)); + fprintf(stderr, "%s", getstr(7)); + exit(2); +} + +static const char optstr[] = + "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxyz"; + +struct option long_options[] = +{ + {"binary-files", required_argument, NULL, BIN_OPT}, +#ifndef WITHOUT_GZIP + {"decompress", no_argument, NULL, DECOMPRESS_OPT}, +#endif + {"help", no_argument, NULL, HELP_OPT}, + {"mmap", no_argument, NULL, MMAP_OPT}, + {"line-buffered", no_argument, NULL, LINEBUF_OPT}, + {"label", required_argument, NULL, LABEL_OPT}, + {"color", optional_argument, NULL, COLOR_OPT}, + {"colour", optional_argument, NULL, COLOR_OPT}, + {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, + {"include", required_argument, NULL, R_INCLUDE_OPT}, + {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, + {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, + {"after-context", required_argument, NULL, 'A'}, + {"text", no_argument, NULL, 'a'}, + {"before-context", required_argument, NULL, 'B'}, + {"byte-offset", no_argument, NULL, 'b'}, + {"context", optional_argument, NULL, 'C'}, + {"count", no_argument, NULL, 'c'}, + {"devices", required_argument, NULL, 'D'}, + {"directories", required_argument, NULL, 'd'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"regexp", required_argument, NULL, 'e'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"file", required_argument, NULL, 'f'}, + {"basic-regexp", no_argument, NULL, 'G'}, + {"no-filename", no_argument, NULL, 'h'}, + {"with-filename", no_argument, NULL, 'H'}, + {"ignore-case", no_argument, NULL, 'i'}, +#ifndef WITHOUT_BZ2 + {"bz2decompress", no_argument, NULL, 'J'}, +#endif + {"files-with-matches", no_argument, NULL, 'l'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"max-count", required_argument, NULL, 'm'}, + {"line-number", no_argument, NULL, 'n'}, + {"only-matching", no_argument, NULL, 'o'}, + {"quiet", no_argument, NULL, 'q'}, + {"silent", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"no-messages", no_argument, NULL, 's'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"invert-match", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {"word-regexp", no_argument, NULL, 'w'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, + {NULL, no_argument, NULL, 0} +}; + +/* + * Adds a searching pattern to the internal array. + */ +static void +add_pattern(char *pat, size_t len) +{ + + /* TODO: Check for empty patterns and shortcut */ + + /* Increase size if necessary */ + if (patterns == pattern_sz) { + pattern_sz *= 2; + pattern = grep_realloc(pattern, ++pattern_sz * + sizeof(*pattern)); + } + if (len > 0 && pat[len - 1] == '\n') + --len; + /* pat may not be NUL-terminated */ + pattern[patterns] = grep_malloc(len + 1); + memcpy(pattern[patterns], pat, len); + pattern[patterns][len] = '\0'; + ++patterns; +} + +/* + * Adds a file include/exclude pattern to the internal array. + */ +static void +add_fpattern(const char *pat, int mode) +{ + + /* Increase size if necessary */ + if (fpatterns == fpattern_sz) { + fpattern_sz *= 2; + fpattern = grep_realloc(fpattern, ++fpattern_sz * + sizeof(struct epat)); + } + fpattern[fpatterns].pat = grep_strdup(pat); + fpattern[fpatterns].mode = mode; + ++fpatterns; +} + +/* + * Adds a directory include/exclude pattern to the internal array. + */ +static void +add_dpattern(const char *pat, int mode) +{ + + /* Increase size if necessary */ + if (dpatterns == dpattern_sz) { + dpattern_sz *= 2; + dpattern = grep_realloc(dpattern, ++dpattern_sz * + sizeof(struct epat)); + } + dpattern[dpatterns].pat = grep_strdup(pat); + dpattern[dpatterns].mode = mode; + ++dpatterns; +} + +/* + * Reads searching patterns from a file and adds them with add_pattern(). + */ +static void +read_patterns(const char *fn) +{ + FILE *f; + char *line; + size_t len; + ssize_t rlen; + + if ((f = fopen(fn, "r")) == NULL) + err(2, "%s", fn); + line = NULL; + len = 0; + while ((rlen = getline(&line, &len, f)) != -1) + add_pattern(line, *line == '\n' ? 0 : (size_t)rlen); + free(line); + if (ferror(f)) + err(2, "%s", fn); + fclose(f); +} + +static inline const char * +init_color(const char *d) +{ + char *c; + + c = getenv("GREP_COLOR"); + return (c != NULL ? c : d); +} + +int +main(int argc, char *argv[]) +{ + char **aargv, **eargv, *eopts; + char *ep; + unsigned long long l; + unsigned int aargc, eargc, i, j; + int c, lastc, needpattern, newarg, prevoptind; + + setlocale(LC_ALL, ""); + +#ifndef WITHOUT_NLS + catalog = catopen("grep", NL_CAT_LOCALE); +#endif + + /* Check what is the program name of the binary. In this + way we can have all the funcionalities in one binary + without the need of scripting and using ugly hacks. */ + switch (__progname[0]) { + case 'e': + grepbehave = GREP_EXTENDED; + break; + case 'f': + grepbehave = GREP_FIXED; + break; + case 'g': + grepbehave = GREP_BASIC; + break; +#ifndef WITHOUT_GZIP + case 'z': + filebehave = FILE_GZIP; + switch(__progname[1]) { + case 'e': + grepbehave = GREP_EXTENDED; + break; + case 'f': + grepbehave = GREP_FIXED; + break; + case 'g': + grepbehave = GREP_BASIC; + break; + } + break; +#endif + } + + lastc = '\0'; + newarg = 1; + prevoptind = 1; + needpattern = 1; + + eopts = getenv("GREP_OPTIONS"); + + /* support for extra arguments in GREP_OPTIONS */ + eargc = 0; + if (eopts != NULL) { + char *str; + + /* make an estimation of how many extra arguments we have */ + for (j = 0; j < strlen(eopts); j++) + if (eopts[j] == ' ') + eargc++; + + eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); + + eargc = 0; + /* parse extra arguments */ + while ((str = strsep(&eopts, " ")) != NULL) + eargv[eargc++] = grep_strdup(str); + + aargv = (char **)grep_calloc(eargc + argc + 1, + sizeof(char *)); + + aargv[0] = argv[0]; + for (i = 0; i < eargc; i++) + aargv[i + 1] = eargv[i]; + for (j = 1; j < (unsigned int)argc; j++, i++) + aargv[i + 1] = argv[j]; + + aargc = eargc + argc; + } else { + aargv = argv; + aargc = argc; + } + + while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != + -1)) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg || !isdigit(lastc)) + Aflag = 0; + else if (Aflag > LLONG_MAX / 10) { + errno = ERANGE; + err(2, NULL); + } + Aflag = Bflag = (Aflag * 10) + (c - '0'); + break; + case 'C': + if (optarg == NULL) { + Aflag = Bflag = 2; + break; + } + /* FALLTHROUGH */ + case 'A': + /* FALLTHROUGH */ + case 'B': + errno = 0; + l = strtoull(optarg, &ep, 10); + if (((errno == ERANGE) && (l == ULLONG_MAX)) || + ((errno == EINVAL) && (l == 0))) + err(2, NULL); + else if (ep[0] != '\0') { + errno = EINVAL; + err(2, NULL); + } + if (c == 'A') + Aflag = l; + else if (c == 'B') + Bflag = l; + else + Aflag = Bflag = l; + break; + case 'a': + binbehave = BINFILE_TEXT; + break; + case 'b': + bflag = true; + break; + case 'c': + cflag = true; + break; + case 'D': + if (strcasecmp(optarg, "skip") == 0) + devbehave = DEV_SKIP; + else if (strcasecmp(optarg, "read") == 0) + devbehave = DEV_READ; + else + errx(2, getstr(3), "--devices"); + break; + case 'd': + if (strcasecmp("recurse", optarg) == 0) { + Hflag = true; + dirbehave = DIR_RECURSE; + } else if (strcasecmp("skip", optarg) == 0) + dirbehave = DIR_SKIP; + else if (strcasecmp("read", optarg) == 0) + dirbehave = DIR_READ; + else + errx(2, getstr(3), "--directories"); + break; + case 'E': + grepbehave = GREP_EXTENDED; + break; + case 'e': + add_pattern(optarg, strlen(optarg)); + needpattern = 0; + break; + case 'F': + grepbehave = GREP_FIXED; + break; + case 'f': + read_patterns(optarg); + needpattern = 0; + break; + case 'G': + grepbehave = GREP_BASIC; + break; + case 'H': + Hflag = true; + break; + case 'h': + Hflag = false; + hflag = true; + break; + case 'I': + binbehave = BINFILE_SKIP; + break; + case 'i': + case 'y': + iflag = true; + cflags |= REG_ICASE; + break; +#ifndef WITHOUT_BZ2 + case 'J': + filebehave = FILE_BZIP; + break; +#endif + case 'L': + lflag = false; + Lflag = true; + break; + case 'l': + Lflag = false; + lflag = true; + break; + case 'm': + mflag = true; + errno = 0; + mcount = strtoull(optarg, &ep, 10); + if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || + ((errno == EINVAL) && (mcount == 0))) + err(2, NULL); + else if (ep[0] != '\0') { + errno = EINVAL; + err(2, NULL); + } + break; + case 'n': + nflag = true; + break; + case 'O': + linkbehave = LINK_EXPLICIT; + break; + case 'o': + oflag = true; + break; + case 'p': + linkbehave = LINK_SKIP; + break; + case 'q': + qflag = true; + break; + case 'S': + linkbehave = LINK_READ; + break; + case 'R': + case 'r': + dirbehave = DIR_RECURSE; + Hflag = true; + break; + case 's': + sflag = true; + break; + case 'U': + binbehave = BINFILE_BIN; + break; + case 'u': + case MMAP_OPT: + /* noop, compatibility */ + break; + case 'V': + printf(getstr(9), __progname, VERSION); + exit(0); + case 'v': + vflag = true; + break; + case 'w': + wflag = true; + break; + case 'x': + xflag = true; + break; + case 'Z': + nullflag = true; + break; + case 'z': + nulldataflag = true; + line_sep = '\0'; + break; + case BIN_OPT: + if (strcasecmp("binary", optarg) == 0) + binbehave = BINFILE_BIN; + else if (strcasecmp("without-match", optarg) == 0) + binbehave = BINFILE_SKIP; + else if (strcasecmp("text", optarg) == 0) + binbehave = BINFILE_TEXT; + else + errx(2, getstr(3), "--binary-files"); + break; + case COLOR_OPT: + color = NULL; + if (optarg == NULL || strcasecmp("auto", optarg) == 0 || + strcasecmp("tty", optarg) == 0 || + strcasecmp("if-tty", optarg) == 0) { + char *term; + + term = getenv("TERM"); + if (isatty(STDOUT_FILENO) && term != NULL && + strcasecmp(term, "dumb") != 0) + color = init_color("01;31"); + } else if (strcasecmp("always", optarg) == 0 || + strcasecmp("yes", optarg) == 0 || + strcasecmp("force", optarg) == 0) { + color = init_color("01;31"); + } else if (strcasecmp("never", optarg) != 0 && + strcasecmp("none", optarg) != 0 && + strcasecmp("no", optarg) != 0) + errx(2, getstr(3), "--color"); + break; +#ifndef WITHOUT_GZIP + case DECOMPRESS_OPT: + filebehave = FILE_GZIP; + break; +#endif + case LABEL_OPT: + label = optarg; + break; + case LINEBUF_OPT: + lbflag = true; + break; + case R_INCLUDE_OPT: + finclude = true; + add_fpattern(optarg, INCL_PAT); + break; + case R_EXCLUDE_OPT: + fexclude = true; + add_fpattern(optarg, EXCL_PAT); + break; + case R_DINCLUDE_OPT: + dinclude = true; + add_dpattern(optarg, INCL_PAT); + break; + case R_DEXCLUDE_OPT: + dexclude = true; + add_dpattern(optarg, EXCL_PAT); + break; + case HELP_OPT: + default: + usage(); + } + lastc = c; + newarg = optind != prevoptind; + prevoptind = optind; + } + aargc -= optind; + aargv += optind; + + /* Fail if we don't have any pattern */ + if (aargc == 0 && needpattern) + usage(); + + /* Process patterns from command line */ + if (aargc != 0 && needpattern) { + add_pattern(*aargv, strlen(*aargv)); + --aargc; + ++aargv; + } + + switch (grepbehave) { + case GREP_FIXED: + case GREP_BASIC: + break; + case GREP_EXTENDED: + cflags |= REG_EXTENDED; + break; + default: + /* NOTREACHED */ + usage(); + } + + fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); + r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); +/* + * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. + * Optimizations should be done there. + */ + /* Check if cheating is allowed (always is for fgrep). */ + if (grepbehave == GREP_FIXED) { + for (i = 0; i < patterns; ++i) + fgrepcomp(&fg_pattern[i], pattern[i]); + } else { + for (i = 0; i < patterns; ++i) { + if (fastcomp(&fg_pattern[i], pattern[i])) { + /* Fall back to full regex library */ + c = regcomp(&r_pattern[i], pattern[i], cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } + } + } + } + + if (lbflag) { +#ifdef _IOLBF + setvbuf(stdout, NULL, _IOLBF, 0); +#else + setlinebuf(stdout); +#endif + } + + if ((aargc == 0 || aargc == 1) && !Hflag) + hflag = true; + + if (aargc == 0) + exit(!procfile("-")); + + if (dirbehave == DIR_RECURSE) + c = grep_tree(aargv); + else + for (c = 0; aargc--; ++aargv) { + if ((finclude || fexclude) && !file_matching(*aargv)) + continue; + c+= procfile(*aargv); + } + +#ifndef WITHOUT_NLS + catclose(catalog); +#endif + + /* Find out the correct return value according to the + results and the command line option. */ + exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1)); +} diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h new file mode 100644 index 0000000..b7ef7fa --- /dev/null +++ b/usr.bin/grep/grep.h @@ -0,0 +1,162 @@ +/* $NetBSD: grep.h,v 1.10 2018/08/12 09:03:21 christos Exp $ */ +/* $OpenBSD: grep.h,v 1.15 2010/04/05 03:03:55 tedu Exp $ */ +/* $FreeBSD: head/usr.bin/grep/grep.h 211496 2010-08-19 09:28:59Z des $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (c) 2008-2009 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef WITHOUT_BZ2 +#include +#endif +#include +#include +#include +#include +#ifndef WITHOUT_GZIP +#include +#endif + +#ifdef WITHOUT_NLS +#define getstr(n) errstr[n] +#else +#include + +extern nl_catd catalog; +#define getstr(n) catgets(catalog, 1, n, errstr[n]) +#endif + +extern const char *errstr[]; + +#define VERSION "2.5.1-FreeBSD" + +#define GREP_FIXED 0 +#define GREP_BASIC 1 +#define GREP_EXTENDED 2 + +#define BINFILE_BIN 0 +#define BINFILE_SKIP 1 +#define BINFILE_TEXT 2 + +#define FILE_STDIO 0 +#define FILE_GZIP 1 +#define FILE_BZIP 2 + +#define DIR_READ 0 +#define DIR_SKIP 1 +#define DIR_RECURSE 2 + +#define DEV_READ 0 +#define DEV_SKIP 1 + +#define LINK_READ 0 +#define LINK_EXPLICIT 1 +#define LINK_SKIP 2 + +#define EXCL_PAT 0 +#define INCL_PAT 1 + +#define MAX_LINE_MATCHES 32 + +struct file { + int fd; + bool binary; +}; + +struct str { + off_t off; + size_t len; + char *dat; + char *file; + int line_no; +}; + +struct epat { + char *pat; + int mode; +}; + +typedef struct { + size_t len; + unsigned char *pattern; + int qsBc[UCHAR_MAX + 1]; + /* flags */ + bool bol; + bool eol; + bool reversed; + bool word; +} fastgrep_t; + +/* Flags passed to regcomp() and regexec() */ +extern int cflags, eflags; + +/* Command line flags */ +extern bool Eflag, Fflag, Gflag, Hflag, Lflag, + bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, + qflag, sflag, vflag, wflag, xflag; +extern bool dexclude, dinclude, fexclude, finclude, lbflag, nullflag, nulldataflag; +extern unsigned char line_sep; +extern unsigned long long Aflag, Bflag, mcount; +extern char *label; +extern const char *color; +extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave; + +extern bool notfound; +extern int tail; +extern unsigned int dpatterns, fpatterns, patterns; +extern char **pattern; +extern struct epat *dpattern, *fpattern; +extern regex_t *er_pattern, *r_pattern; +extern fastgrep_t *fg_pattern; + +/* For regex errors */ +#define RE_ERROR_BUF 512 +extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */ + +/* util.c */ +bool file_matching(const char *fname); +int procfile(const char *fn); +int grep_tree(char **argv); +void *grep_malloc(size_t size); +void *grep_calloc(size_t nmemb, size_t size); +void *grep_realloc(void *ptr, size_t size); +char *grep_strdup(const char *str); +void printline(struct str *line, int sep, regmatch_t *matches, int m); + +/* queue.c */ +void enqueue(struct str *x); +void printqueue(void); +void clearqueue(void); + +/* file.c */ +void grep_close(struct file *f); +struct file *grep_open(const char *path); +char *grep_fgetln(struct file *f, size_t *len); + +/* fastgrep.c */ +int fastcomp(fastgrep_t *, const char *); +void fgrepcomp(fastgrep_t *, const char *); +int grep_search(fastgrep_t *, const unsigned char *, size_t, regmatch_t *); diff --git a/usr.bin/grep/nls/C.msg b/usr.bin/grep/nls/C.msg new file mode 100644 index 0000000..d07aea6 --- /dev/null +++ b/usr.bin/grep/nls/C.msg @@ -0,0 +1,13 @@ +$ $FreeBSD: head/usr.bin/grep/nls/C.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(standard input)" +2 "cannot read bzip2 compressed file" +3 "unknown %s option" +4 "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n" +5 "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n" +6 "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n" +7 "\t[pattern] [file ...]\n" +8 "Binary file %s matches\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/es_ES.ISO8859-1.msg b/usr.bin/grep/nls/es_ES.ISO8859-1.msg new file mode 100644 index 0000000..441ba01 --- /dev/null +++ b/usr.bin/grep/nls/es_ES.ISO8859-1.msg @@ -0,0 +1,14 @@ +$ $NetBSD: es_ES.ISO8859-1.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/es_ES.ISO8859-1.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(entrada estndar)" +2 "no se puede leer el fichero comprimido bzip2" +3 "opcin desconocida de %s" +4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A no] [-B no] [-C[no]]\n" +5 "\t[-e pauta] [-f fichero] [--binary-files=valor] [--color=cuando]\n" +6 "\t[--context[=no]] [--directories=accin] [--label] [--line-buffered]\n" +7 "\t[pauta] [fichero ...]\n" +8 "fichero binario %s se ajusta\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/gl_ES.ISO8859-1.msg b/usr.bin/grep/nls/gl_ES.ISO8859-1.msg new file mode 100644 index 0000000..ab50cea --- /dev/null +++ b/usr.bin/grep/nls/gl_ES.ISO8859-1.msg @@ -0,0 +1,14 @@ +$ $NetBSD: gl_ES.ISO8859-1.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/gl_ES.ISO8859-1.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(entrada estndar)" +2 "non se pode ler o ficheiro comprimido bzip2" +3 "opcin descoecida de %s" +4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A no] [-B no] [-C[no]]\n" +5 "\t[-e pauta] [-f ficheiro] [--binary-files=valor] [--color=cando]\n" +6 "\t[--context[=no]] [--directories=accin] [--label] [--line-buffered]\n" +7 "\t[pauta] [ficheiro ...]\n" +8 "ficheiro binario %s conforma\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/hu_HU.ISO8859-2.msg b/usr.bin/grep/nls/hu_HU.ISO8859-2.msg new file mode 100644 index 0000000..69dc2ec --- /dev/null +++ b/usr.bin/grep/nls/hu_HU.ISO8859-2.msg @@ -0,0 +1,14 @@ +$ $NetBSD: hu_HU.ISO8859-2.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/hu_HU.ISO8859-2.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(szabvnyos bemenet)" +2 "bzip2 tmrtett fjl nem olvashat" +3 "ismeretlen %s opci" +4 "hasznlat: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A szm] [-B szm] [-C[szm]]\n" +5 "\t[-e minta] [-f fjl] [--binary-files=rtk] [--color=mikor]\n" +6 "\t[--context[=szm]] [--directories=mvelet] [--label] [--line-buffered]\n" +7 "\t[minta] [fjl ...]\n" +8 "%s binris fjl illeszkedik\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/ja_JP.SJIS.msg b/usr.bin/grep/nls/ja_JP.SJIS.msg new file mode 100644 index 0000000..99357b6 --- /dev/null +++ b/usr.bin/grep/nls/ja_JP.SJIS.msg @@ -0,0 +1,14 @@ +$ $NetBSD: ja_JP.SJIS.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/ja_JP.SJIS.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(W)" +2 "bzip2 kt@CǂݍނƂł܂" +3 "%s IvV̎wlɌ肪܂" +4 "g: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A ] [-B ] [-C[]]\n" +5 "\t[-e p^[] [-f t@C] [--binary-files=l] [--color=l]\n" +6 "\t[--context[=]] [--directories=] [--label] [--line-buffered]\n" +7 "\t[p^[] [t@C ...]\n" +8 "oCit@C %s Ƀ}b`܂\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/ja_JP.UTF-8.msg b/usr.bin/grep/nls/ja_JP.UTF-8.msg new file mode 100644 index 0000000..3d4aed3 --- /dev/null +++ b/usr.bin/grep/nls/ja_JP.UTF-8.msg @@ -0,0 +1,14 @@ +$ $NetBSD: ja_JP.UTF-8.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/ja_JP.UTF-8.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(標準入力)" +2 "bzip2 圧縮ファイルを読み込むことができません" +3 "%s オプションの指定値に誤りがあります" +4 "使い方: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A 数字] [-B 数字] [-C[数字]]\n" +5 "\t[-e パターン] [-f ファイル名] [--binary-files=値] [--color=値]\n" +6 "\t[--context[=数字]] [--directories=動作] [--label] [--line-buffered]\n" +7 "\t[パターン] [ファイル名 ...]\n" +8 "バイナリファイル %s にマッチしました\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/ja_JP.eucJP.msg b/usr.bin/grep/nls/ja_JP.eucJP.msg new file mode 100644 index 0000000..08e0e7d --- /dev/null +++ b/usr.bin/grep/nls/ja_JP.eucJP.msg @@ -0,0 +1,14 @@ +$ $NetBSD: ja_JP.eucJP.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/ja_JP.eucJP.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(ɸ)" +2 "bzip2 ̥եɤ߹ळȤǤޤ" +3 "%s ץλͤ˸꤬ޤ" +4 "Ȥ: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A ] [-B ] [-C[]]\n" +5 "\t[-e ѥ] [-f ե̾] [--binary-files=] [--color=]\n" +6 "\t[--context[=]] [--directories=ư] [--label] [--line-buffered]\n" +7 "\t[ѥ] [ե̾ ...]\n" +8 "Хʥե %s ˥ޥåޤ\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/pt_BR.ISO8859-1.msg b/usr.bin/grep/nls/pt_BR.ISO8859-1.msg new file mode 100644 index 0000000..7b6c20e --- /dev/null +++ b/usr.bin/grep/nls/pt_BR.ISO8859-1.msg @@ -0,0 +1,14 @@ +$ $NetBSD: pt_BR.ISO8859-1.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/pt_BR.ISO8859-1.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "(entrada padro)" +2 "no se posso ler o fichero comprimido bzip2" +3 "opco no conhecida de %s" +4 "uso: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n" +5 "\t[-e padro] [-f arquivo] [--binary-files=valor] [--color=quando]\n" +6 "\t[--context[=num]] [--directories=ao] [--label] [--line-buffered]\n" +7 "\t[padro] [arquivo ...]\n" +8 "arquivo binrio %s casa com o padro\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/ru_RU.KOI8-R.msg b/usr.bin/grep/nls/ru_RU.KOI8-R.msg new file mode 100644 index 0000000..4c69f73 --- /dev/null +++ b/usr.bin/grep/nls/ru_RU.KOI8-R.msg @@ -0,0 +1,14 @@ +$ $NetBSD: ru_RU.KOI8-R.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/ru_RU.KOI8-R.msg 210622 2010-07-29 18:02:57Z gabor $ +$ +$set 1 +$quote " +1 "( )" +2 " bzip2 " +3 " %s" +4 ": %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A ] [-B ] [-C[]]\n" +5 "\t[-e ] [-f ] [--binary-files=] [--color=]\n" +6 "\t[--context[=]] [--directories=] [--label] [--line-buffered]\n" +7 "\t[] [ ...]\n" +8 " %s \n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/uk_UA.UTF-8.msg b/usr.bin/grep/nls/uk_UA.UTF-8.msg new file mode 100644 index 0000000..e6b9f54 --- /dev/null +++ b/usr.bin/grep/nls/uk_UA.UTF-8.msg @@ -0,0 +1,13 @@ +$ $NetBSD: uk_UA.UTF-8.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/uk_UA.UTF-8.msg 210927 2010-08-06 10:34:48Z gabor $ +$set 1 +$quote " +1 "(стандартний ввід)" +2 "не можу прочитати стиснутий bzip2 файл" +3 "невiдома опція %s" +4 "використання: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A чис] [-B чис] [-C[чис]]\n" +5 "\t[-e шаблон] [-f файл] [--binary-files=значення] [--color=коли]\n" +6 "\t[--context[=чис] [--directories=дія] [--label] [--line-buffered]\n" +7 "\t[шаблон] [файл ...]\n" +8 "двійковий файл %s співпадає\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/nls/zh_CN.UTF-8.msg b/usr.bin/grep/nls/zh_CN.UTF-8.msg new file mode 100644 index 0000000..e9d40cd --- /dev/null +++ b/usr.bin/grep/nls/zh_CN.UTF-8.msg @@ -0,0 +1,14 @@ +$ $NetBSD: zh_CN.UTF-8.msg,v 1.2 2011/04/18 22:46:48 joerg Exp $ +$ $FreeBSD: head/usr.bin/grep/nls/zh_CN.UTF-8.msg 212927 2010-09-20 19:42:52Z delphij $ +$ +$set 1 +$quote " +1 "(标准输入)" +2 "读取 bzip2 压缩文件时出错" +3 "选项 %s 无法识别" +4 "用法: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A 行数] [-B 行数] [-C[行数]]\n" +5 "\t[-e 模式] [-f 文件] [--binary-files=值] [--color=何时]\n" +6 "\t[--context[=行数]] [--directories=动作] [--label] [--line-buffered]\n" +7 "\t[模式] [文件名 ...]\n" +8 "二进制文件 %s 包含模式\n" +9 "%s (BSD grep) %s\n" diff --git a/usr.bin/grep/queue.c b/usr.bin/grep/queue.c new file mode 100644 index 0000000..e3c6be1 --- /dev/null +++ b/usr.bin/grep/queue.c @@ -0,0 +1,116 @@ +/* $NetBSD: queue.c,v 1.5 2011/08/31 16:24:57 plunky Exp $ */ +/* $FreeBSD: head/usr.bin/grep/queue.c 211496 2010-08-19 09:28:59Z des $ */ +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * A really poor man's queue. It does only what it has to and gets out of + * Dodge. It is used in place of to get a better performance. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: queue.c,v 1.5 2011/08/31 16:24:57 plunky Exp $"); + +#include +#include + +#include +#include + +#include "grep.h" + +struct qentry { + STAILQ_ENTRY(qentry) list; + struct str data; +}; + +static STAILQ_HEAD(, qentry) queue = STAILQ_HEAD_INITIALIZER(queue); +static unsigned long long count; + +static struct qentry *dequeue(void); + +void +enqueue(struct str *x) +{ + struct qentry *item; + + item = grep_malloc(sizeof(struct qentry)); + item->data.dat = grep_malloc(sizeof(char) * x->len); + item->data.len = x->len; + item->data.line_no = x->line_no; + item->data.off = x->off; + memcpy(item->data.dat, x->dat, x->len); + item->data.file = x->file; + + STAILQ_INSERT_TAIL(&queue, item, list); + + if (++count > Bflag) { + item = dequeue(); + free(item->data.dat); + free(item); + } +} + +static struct qentry * +dequeue(void) +{ + struct qentry *item; + + item = STAILQ_FIRST(&queue); + if (item == NULL) + return (NULL); + + STAILQ_REMOVE_HEAD(&queue, list); + --count; + return (item); +} + +void +printqueue(void) +{ + struct qentry *item; + + while ((item = dequeue()) != NULL) { + printline(&item->data, '-', NULL, 0); + free(item->data.dat); + free(item); + } +} + +void +clearqueue(void) +{ + struct qentry *item; + + while ((item = dequeue()) != NULL) { + free(item->data.dat); + free(item); + } +} diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c new file mode 100644 index 0000000..a3c9e4c --- /dev/null +++ b/usr.bin/grep/util.c @@ -0,0 +1,500 @@ +/* $NetBSD: util.c,v 1.19 2018/02/05 22:14:26 mrg Exp $ */ +/* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */ +/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008-2010 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include +__RCSID("$NetBSD: util.c,v 1.19 2018/02/05 22:14:26 mrg Exp $"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "grep.h" + +static bool first, first_global = true; +static unsigned long long since_printed; + +static int procline(struct str *l, int); + +bool +file_matching(const char *fname) +{ + char *fname_base, *fname_copy; + unsigned int i; + bool ret; + + ret = finclude ? false : true; + fname_copy = grep_strdup(fname); + fname_base = basename(fname_copy); + + for (i = 0; i < fpatterns; ++i) { + if (fnmatch(fpattern[i].pat, fname, 0) == 0 || + fnmatch(fpattern[i].pat, fname_base, 0) == 0) { + if (fpattern[i].mode == EXCL_PAT) { + free(fname_copy); + return (false); + } else + ret = true; + } + } + free(fname_copy); + return (ret); +} + +static inline bool +dir_matching(const char *dname) +{ + unsigned int i; + bool ret; + + ret = dinclude ? false : true; + + for (i = 0; i < dpatterns; ++i) { + if (dname != NULL && + fnmatch(dname, dpattern[i].pat, 0) == 0) { + if (dpattern[i].mode == EXCL_PAT) + return (false); + else + ret = true; + } + } + return (ret); +} + +/* + * Processes a directory when a recursive search is performed with + * the -R option. Each appropriate file is passed to procfile(). + */ +int +grep_tree(char **argv) +{ + FTS *fts; + FTSENT *p; + char *d, *dir = NULL; + int c, fts_flags; + bool ok; + + c = fts_flags = 0; + + switch(linkbehave) { + case LINK_EXPLICIT: + fts_flags = FTS_COMFOLLOW; + break; + case LINK_SKIP: + fts_flags = FTS_PHYSICAL; + break; + default: + fts_flags = FTS_LOGICAL; + + } + + fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; + + if (!(fts = fts_open(argv, fts_flags, NULL))) + err(2, "fts_open"); + while ((p = fts_read(fts)) != NULL) { + switch (p->fts_info) { + case FTS_DNR: + /* FALLTHROUGH */ + case FTS_ERR: + errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); + break; + case FTS_D: + /* FALLTHROUGH */ + case FTS_DP: + break; + case FTS_DC: + /* Print a warning for recursive directory loop */ + warnx("warning: %s: recursive directory loop", + p->fts_path); + break; + default: + /* Check for file exclusion/inclusion */ + ok = true; + if (dexclude || dinclude) { + if ((d = strrchr(p->fts_path, '/')) != NULL) { + dir = grep_malloc(sizeof(char) * + (d - p->fts_path + 1)); + memcpy(dir, p->fts_path, + d - p->fts_path); + dir[d - p->fts_path] = '\0'; + } + ok = dir_matching(dir); + free(dir); + dir = NULL; + } + if (fexclude || finclude) + ok &= file_matching(p->fts_path); + + if (ok) + c += procfile(p->fts_path); + break; + } + } + + fts_close(fts); + return (c); +} + +/* + * Opens a file and processes it. Each file is processed line-by-line + * passing the lines to procline(). + */ +int +procfile(const char *fn) +{ + struct file *f; + struct stat sb; + struct str ln; + mode_t s; + int c, t; + + if (mflag && (mcount <= 0)) + return (0); + + if (strcmp(fn, "-") == 0) { + fn = label != NULL ? label : getstr(1); + f = grep_open(NULL); + } else { + if (!stat(fn, &sb)) { + /* Check if we need to process the file */ + s = sb.st_mode & S_IFMT; + if (s == S_IFDIR && dirbehave == DIR_SKIP) + return (0); + if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK + || s == S_IFSOCK) && devbehave == DEV_SKIP) + return (0); + } + f = grep_open(fn); + } + if (f == NULL) { + if (!sflag) + warn("%s", fn); + if (errno == ENOENT) + notfound = true; + return (0); + } + + ln.file = grep_malloc(strlen(fn) + 1); + strcpy(ln.file, fn); + ln.line_no = 0; + ln.len = 0; + tail = 0; + ln.off = -1; + + for (first = true, c = 0; c == 0 || !(lflag || qflag); ) { + ln.off += ln.len + 1; + if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) + break; + if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep) + --ln.len; + ln.line_no++; + + /* Return if we need to skip a binary file */ + if (f->binary && binbehave == BINFILE_SKIP) { + grep_close(f); + free(ln.file); + free(f); + return (0); + } + /* Process the file line-by-line */ + t = procline(&ln, f->binary); + c += t; + + /* Count the matches if we have a match limit */ + if (mflag) { + mcount -= t; + if (mcount <= 0) + break; + } + } + if (Bflag > 0) + clearqueue(); + grep_close(f); + + if (cflag) { + if (!hflag) + printf("%s:", ln.file); + printf("%u%c", c, line_sep); + } + if (lflag && !qflag && c != 0) + printf("%s%c", fn, line_sep); + if (Lflag && !qflag && c == 0) + printf("%s%c", fn, line_sep); + if (c && !cflag && !lflag && !Lflag && + binbehave == BINFILE_BIN && f->binary && !qflag) + printf(getstr(8), fn); + + free(ln.file); + free(f); + return (c); +} + +#define iswword(x) (iswalnum((x)) || (x) == L'_') + +/* + * Processes a line comparing it with the specified patterns. Each pattern + * is looped to be compared along with the full string, saving each and every + * match, which is necessary to colorize the output and to count the + * matches. The matching lines are passed to printline() to display the + * appropriate output. + */ +static int +procline(struct str *l, int nottext) +{ + regmatch_t matches[MAX_LINE_MATCHES]; + regmatch_t pmatch; + size_t st = 0; + unsigned int i; + int c = 0, m = 0, r = 0; + + /* Loop to process the whole line */ + while (st <= l->len) { + pmatch.rm_so = st; + pmatch.rm_eo = l->len; + + /* Loop to compare with all the patterns */ + for (i = 0; i < patterns; i++) { +/* + * XXX: grep_search() is a workaround for speed up and should be + * removed in the future. See fastgrep.c. + */ + if (fg_pattern[i].pattern) { + r = grep_search(&fg_pattern[i], + (unsigned char *)l->dat, + l->len, &pmatch); + r = (r == 0) ? 0 : REG_NOMATCH; + st = pmatch.rm_eo; + } else { + r = regexec(&r_pattern[i], l->dat, 1, + &pmatch, eflags); + r = (r == 0) ? 0 : REG_NOMATCH; + st = pmatch.rm_eo; + } + if (r == REG_NOMATCH) + continue; + /* Check for full match */ + if (xflag && + (pmatch.rm_so != 0 || + (size_t)pmatch.rm_eo != l->len)) + continue; + /* Check for whole word match */ + if (fg_pattern[i].word && pmatch.rm_so != 0) { + wchar_t wbegin, wend; + + wbegin = wend = L' '; + if (pmatch.rm_so != 0 && + sscanf(&l->dat[pmatch.rm_so - 1], + "%lc", &wbegin) != 1) + continue; + if ((size_t)pmatch.rm_eo != l->len && + sscanf(&l->dat[pmatch.rm_eo], + "%lc", &wend) != 1) + continue; + if (iswword(wbegin) || iswword(wend)) + continue; + } + c = 1; + if (m < MAX_LINE_MATCHES) + matches[m++] = pmatch; + /* matches - skip further patterns */ + if ((color != NULL && !oflag) || qflag || lflag) + break; + } + + if (vflag) { + c = !c; + break; + } + /* One pass if we are not recording matches */ + if ((color != NULL && !oflag) || qflag || lflag) + break; + + if (st == (size_t)pmatch.rm_so) + break; /* No matches */ + } + + if (c && binbehave == BINFILE_BIN && nottext) + return (c); /* Binary file */ + + /* Dealing with the context */ + if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { + if (c) { + if ((Aflag || Bflag) && !first_global && + (first || since_printed > Bflag)) + printf("--\n"); + tail = Aflag; + if (Bflag > 0) + printqueue(); + printline(l, ':', matches, m); + } else { + printline(l, '-', matches, m); + tail--; + } + first = false; + first_global = false; + since_printed = 0; + } else { + if (Bflag) + enqueue(l); + since_printed++; + } + return (c); +} + +/* + * Safe malloc() for internal use. + */ +void * +grep_malloc(size_t size) +{ + void *ptr; + + if ((ptr = malloc(size)) == NULL) + err(2, "malloc"); + return (ptr); +} + +/* + * Safe calloc() for internal use. + */ +void * +grep_calloc(size_t nmemb, size_t size) +{ + void *ptr; + + if ((ptr = calloc(nmemb, size)) == NULL) + err(2, "calloc"); + return (ptr); +} + +/* + * Safe realloc() for internal use. + */ +void * +grep_realloc(void *ptr, size_t size) +{ + + if ((ptr = realloc(ptr, size)) == NULL) + err(2, "realloc"); + return (ptr); +} + +/* + * Safe strdup() for internal use. + */ +char * +grep_strdup(const char *str) +{ + char *ret; + + if ((ret = strdup(str)) == NULL) + err(2, "strdup"); + return (ret); +} + +/* + * Prints a matching line according to the command line options. + */ +void +printline(struct str *line, int sep, regmatch_t *matches, int m) +{ + size_t a = 0; + int i, n = 0; + + if (!hflag) { + if (nullflag == 0) + fputs(line->file, stdout); + else { + printf("%s", line->file); + putchar(0); + } + ++n; + } + if (nflag) { + if (n > 0) + putchar(sep); + printf("%d", line->line_no); + ++n; + } + if (bflag) { + if (n > 0) + putchar(sep); + printf("%lld", (long long)line->off); + ++n; + } + if (n) + putchar(sep); + /* --color and -o */ + if ((oflag || color) && m > 0) { + for (i = 0; i < m; i++) { + if (!oflag) + fwrite(line->dat + a, matches[i].rm_so - a, 1, + stdout); + if (color) + fprintf(stdout, "\33[%sm\33[K", color); + + fwrite(line->dat + matches[i].rm_so, + matches[i].rm_eo - matches[i].rm_so, 1, + stdout); + + if (color) + fprintf(stdout, "\33[m\33[K"); + a = matches[i].rm_eo; + if (oflag) + putchar('\n'); + } + if (!oflag) { + if (line->len - a > 0) + fwrite(line->dat + a, line->len - a, 1, stdout); + putchar(line_sep); + } + } else { + fwrite(line->dat, line->len, 1, stdout); + putchar(line_sep); + } +} -- cgit v1.2.3-70-g09d2